In [1]:
import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.optim as optim
import torchvision.transforms as standard_transforms

import numpy as np
import glob

from data_loader import Rescale
from data_loader import RescaleT
from data_loader import RandomCrop
from data_loader import CenterCrop
from data_loader import ToTensor
from data_loader import ToTensorLab
from data_loader import SalObjDataset

from model import BASNet

import pytorch_ssim
import pytorch_iou


In [2]:
bce_loss = nn.BCELoss(size_average=True)
ssim_loss = pytorch_ssim.SSIM(window_size=11,size_average=True)
iou_loss = pytorch_iou.IOU(size_average=True)

def bce_ssim_loss(pred,target):

	bce_out = bce_loss(pred,target)
	ssim_out = 1 - ssim_loss(pred,target)
	iou_out = iou_loss(pred,target)

	loss = bce_out + ssim_out + iou_out

	return loss

def muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, d7, labels_v):

	loss0 = bce_ssim_loss(d0,labels_v)
	loss1 = bce_ssim_loss(d1,labels_v)
	loss2 = bce_ssim_loss(d2,labels_v)
	loss3 = bce_ssim_loss(d3,labels_v)
	loss4 = bce_ssim_loss(d4,labels_v)
	loss5 = bce_ssim_loss(d5,labels_v)
	loss6 = bce_ssim_loss(d6,labels_v)
	loss7 = bce_ssim_loss(d7,labels_v)
	#ssim0 = 1 - ssim_loss(d0,labels_v)

	# iou0 = iou_loss(d0,labels_v)
	#loss = torch.pow(torch.mean(torch.abs(labels_v-d0)),2)*(5.0*loss0 + loss1 + loss2 + loss3 + loss4 + loss5) #+ 5.0*lossa
	loss = loss0 + loss1 + loss2 + loss3 + loss4 + loss5 + loss6 + loss7#+ 5.0*lossa
	print("l0: %3f, l1: %3f, l2: %3f, l3: %3f, l4: %3f, l5: %3f, l6: %3f\n"%(loss0.data[0],loss1.data[0],loss2.data[0],loss3.data[0],loss4.data[0],loss5.data[0],loss6.data[0]))
	# print("BCE: l1:%3f, l2:%3f, l3:%3f, l4:%3f, l5:%3f, la:%3f, all:%3f\n"%(loss1.data[0],loss2.data[0],loss3.data[0],loss4.data[0],loss5.data[0],lossa.data[0],loss.data[0]))

	return loss0, loss



In [3]:
tra_image_dir = 'C:/Users/yangz/Desktop/FireProjectBatchDownloadNew/PerimeterTrainingDataRescaledV4Filtered/TrainingRF-V4-'
lbl_image_dir = 'C:/Users/yangz/Desktop/FireProjectBatchDownloadNew/PerimeterLabelsRescaledV3Filtered/LabelRF-V3-'

model_dir = 'C:/Users/yangz/Desktop/FireProject/BASNet/saved_models/basnet_bsi-Copy1/'

epoch_num = 100
batch_size_train = 2
batch_size_val = 1
train_num = 0
val_num = 0

tra_img_name_list = glob.glob(tra_image_dir + '*.jpg')
tra_lbl_name_list = glob.glob(lbl_image_dir + '*.png')

print("---")
print("train images: ", len(tra_img_name_list))
print("train labels: ", len(tra_lbl_name_list))
print("---")

train_num = len(tra_img_name_list)

salobj_dataset = SalObjDataset(
    img_name_list=tra_img_name_list,
    lbl_name_list=tra_lbl_name_list,
    transform=transforms.Compose([
        RescaleT(256),
        RandomCrop(224),
        ToTensorLab(flag=0)]))
salobj_dataloader = DataLoader(salobj_dataset, batch_size=batch_size_train, shuffle=True, num_workers=1)

---
train images:  792
train labels:  792
---


In [4]:
net = BASNet(3, 1)
if torch.cuda.is_available():
    print ("Using CUDA")
    net.cuda()


Using CUDA


In [5]:
print("---define optimizer...")
optimizer = optim.Adam(net.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)

---define optimizer...


In [6]:
net.load_state_dict(torch.load(model_dir+"basnet_bsi_itr_37680_train_6.026760_tar_0.654454.pth"))

In [7]:
print("---start training...")
ite_num = 37680
running_loss = 0.0
running_tar_loss = 0.0
ite_num4val = 0

for epoch in range(0, epoch_num):
    print ("Starting epoch " + str(epoch+1))
    net.train()
    print ("Epoch " + str(epoch+1) + " loading complete")

    for i, data in enumerate(salobj_dataloader):
        ite_num = ite_num + 1
        ite_num4val = ite_num4val + 1

        inputs, labels = data['image'], data['label']

        inputs = inputs.type(torch.FloatTensor)
        labels = labels.type(torch.FloatTensor)

        # wrap them in Variable
        if torch.cuda.is_available():
            inputs_v, labels_v = Variable(inputs.cuda(), requires_grad=False), Variable(labels.cuda(),
                                                                                        requires_grad=False)
        else:
            inputs_v, labels_v = Variable(inputs, requires_grad=False), Variable(labels, requires_grad=False)

        # y zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        d0, d1, d2, d3, d4, d5, d6, d7 = net(inputs_v)
        loss2, loss = muti_bce_loss_fusion(d0, d1, d2, d3, d4, d5, d6, d7, labels_v)

        loss.backward()
        optimizer.step()

        # # print statistics
        running_loss += loss.data.item()
        running_tar_loss += loss2.data.item()

        # del temporary outputs and loss
        del d0, d1, d2, d3, d4, d5, d6, d7, loss2, loss

        print("[epoch: %3d/%3d, batch: %5d/%5d, ite: %d] train loss: %3f, tar: %3f " % (
        epoch + 1, epoch_num, (i + 1) * batch_size_train, train_num, ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val))

        if ite_num % 2000 == 0:  # save model every 2000 iterations

            torch.save(net.state_dict(), model_dir + "basnet_bsi_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val))
            running_loss = 0.0
            running_tar_loss = 0.0
            net.train()  # resume train
            ite_num4val = 0
            
print('-------------Congratulations! Training Done!!!-------------')


---start training...
Starting epoch 1
Epoch 1 loading complete


  "See the documentation of nn.Upsample for details.".format(mode))


l0: 0.367253, l1: 0.377961, l2: 0.380019, l3: 0.379838, l4: 0.376151, l5: 0.425060, l6: 0.634960

[epoch:   1/100, batch:     2/  792, ite: 37681] train loss: 3.584586, tar: 0.367253 
l0: 0.758641, l1: 0.758841, l2: 0.757465, l3: 0.761288, l4: 0.776782, l5: 0.811884, l6: 1.141542

[epoch:   1/100, batch:     4/  792, ite: 37682] train loss: 5.212924, tar: 0.562947 
l0: 0.473289, l1: 0.473604, l2: 0.472447, l3: 0.486701, l4: 0.536152, l5: 0.647107, l6: 0.754759

[epoch:   1/100, batch:     6/  792, ite: 37683] train loss: 5.048249, tar: 0.533061 
l0: 0.629045, l1: 0.640108, l2: 0.632993, l3: 0.608896, l4: 0.576087, l5: 0.642872, l6: 0.687504

[epoch:   1/100, batch:     8/  792, ite: 37684] train loss: 5.097287, tar: 0.557057 
l0: 0.871136, l1: 0.871136, l2: 0.866297, l3: 0.859257, l4: 0.922133, l5: 0.880153, l6: 0.829255

[epoch:   1/100, batch:    10/  792, ite: 37685] train loss: 5.478712, tar: 0.619873 
l0: 0.585558, l1: 0.586165, l2: 0.583674, l3: 0.588250, l4: 0.585267, l5: 0.6679

[epoch:   1/100, batch:    90/  792, ite: 37725] train loss: 5.797239, tar: 0.631550 
l0: 0.677483, l1: 0.683855, l2: 0.684382, l3: 0.687657, l4: 0.701528, l5: 0.810135, l6: 0.967836

[epoch:   1/100, batch:    92/  792, ite: 37726] train loss: 5.806103, tar: 0.632549 
l0: 1.007214, l1: 1.001238, l2: 1.000159, l3: 1.021104, l4: 1.041667, l5: 1.024222, l6: 1.173312

[epoch:   1/100, batch:    94/  792, ite: 37727] train loss: 5.863662, tar: 0.640521 
l0: 0.946786, l1: 0.942809, l2: 0.950620, l3: 0.967885, l4: 1.013265, l5: 0.975959, l6: 1.678691

[epoch:   1/100, batch:    96/  792, ite: 37728] train loss: 5.934289, tar: 0.646901 
l0: 0.434200, l1: 0.435142, l2: 0.432904, l3: 0.434518, l4: 0.440164, l5: 0.522613, l6: 0.743333

[epoch:   1/100, batch:    98/  792, ite: 37729] train loss: 5.897840, tar: 0.642560 
l0: 0.567639, l1: 0.572970, l2: 0.582245, l3: 0.592165, l4: 0.675996, l5: 0.830135, l6: 0.981245

[epoch:   1/100, batch:   100/  792, ite: 37730] train loss: 5.897398, tar: 0.64

[epoch:   1/100, batch:   180/  792, ite: 37770] train loss: 5.816148, tar: 0.630615 
l0: 0.836859, l1: 0.833678, l2: 0.829198, l3: 0.836995, l4: 0.879282, l5: 0.999840, l6: 1.072367

[epoch:   1/100, batch:   182/  792, ite: 37771] train loss: 5.832632, tar: 0.632881 
l0: 0.619388, l1: 0.618659, l2: 0.617527, l3: 0.615045, l4: 0.638254, l5: 0.765310, l6: 0.921692

[epoch:   1/100, batch:   184/  792, ite: 37772] train loss: 5.832685, tar: 0.632734 
l0: 0.998807, l1: 1.003852, l2: 1.002972, l3: 1.004835, l4: 1.008461, l5: 1.100683, l6: 1.426455

[epoch:   1/100, batch:   186/  792, ite: 37773] train loss: 5.866330, tar: 0.636671 
l0: 0.822903, l1: 0.825570, l2: 0.828991, l3: 0.832494, l4: 0.821455, l5: 0.940885, l6: 1.191861

[epoch:   1/100, batch:   188/  792, ite: 37774] train loss: 5.882906, tar: 0.638652 
l0: 0.660094, l1: 0.668755, l2: 0.672010, l3: 0.676300, l4: 0.705167, l5: 0.777700, l6: 1.055940

[epoch:   1/100, batch:   190/  792, ite: 37775] train loss: 5.887274, tar: 0.63

[epoch:   1/100, batch:   270/  792, ite: 37815] train loss: 5.822132, tar: 0.629797 
l0: 0.412594, l1: 0.414418, l2: 0.413458, l3: 0.407946, l4: 0.454244, l5: 0.600474, l6: 0.667287

[epoch:   1/100, batch:   272/  792, ite: 37816] train loss: 5.809091, tar: 0.628200 
l0: 0.965538, l1: 0.964624, l2: 0.956514, l3: 0.953895, l4: 0.951579, l5: 1.013340, l6: 1.053885

[epoch:   1/100, batch:   274/  792, ite: 37817] train loss: 5.824439, tar: 0.630662 
l0: 0.602254, l1: 0.611942, l2: 0.622050, l3: 0.627917, l4: 0.677712, l5: 0.765326, l6: 0.868232

[epoch:   1/100, batch:   276/  792, ite: 37818] train loss: 5.823037, tar: 0.630456 
l0: 0.412716, l1: 0.413816, l2: 0.416706, l3: 0.436460, l4: 0.480608, l5: 0.592269, l6: 0.769568

[epoch:   1/100, batch:   278/  792, ite: 37819] train loss: 5.812321, tar: 0.628890 
l0: 0.605792, l1: 0.616620, l2: 0.617614, l3: 0.620155, l4: 0.673643, l5: 0.747589, l6: 0.879304

[epoch:   1/100, batch:   280/  792, ite: 37820] train loss: 5.810845, tar: 0.62

[epoch:   1/100, batch:   360/  792, ite: 37860] train loss: 5.875903, tar: 0.633921 
l0: 1.191678, l1: 1.203955, l2: 1.208101, l3: 1.213987, l4: 1.222142, l5: 1.268315, l6: 1.356491

[epoch:   1/100, batch:   362/  792, ite: 37861] train loss: 5.898925, tar: 0.637003 
l0: 0.722356, l1: 0.724817, l2: 0.720463, l3: 0.721958, l4: 0.726361, l5: 0.788250, l6: 0.913411

[epoch:   1/100, batch:   364/  792, ite: 37862] train loss: 5.900407, tar: 0.637472 
l0: 1.209477, l1: 1.220258, l2: 1.210484, l3: 1.217561, l4: 1.227900, l5: 1.382465, l6: 1.369892

[epoch:   1/100, batch:   366/  792, ite: 37863] train loss: 5.924027, tar: 0.640597 
l0: 0.764407, l1: 0.763384, l2: 0.762919, l3: 0.753064, l4: 0.758712, l5: 0.827337, l6: 1.273847

[epoch:   1/100, batch:   368/  792, ite: 37864] train loss: 5.930877, tar: 0.641270 
l0: 0.634373, l1: 0.638963, l2: 0.635164, l3: 0.631218, l4: 0.644402, l5: 0.665810, l6: 0.779649

[epoch:   1/100, batch:   370/  792, ite: 37865] train loss: 5.928847, tar: 0.64

[epoch:   1/100, batch:   450/  792, ite: 37905] train loss: 6.000841, tar: 0.650201 
l0: 0.443401, l1: 0.446126, l2: 0.445281, l3: 0.453259, l4: 0.460001, l5: 0.495458, l6: 0.603952

[epoch:   1/100, batch:   452/  792, ite: 37906] train loss: 5.992108, tar: 0.649286 
l0: 0.857163, l1: 0.872169, l2: 0.870768, l3: 0.872003, l4: 0.870032, l5: 0.938761, l6: 1.304876

[epoch:   1/100, batch:   454/  792, ite: 37907] train loss: 6.000966, tar: 0.650202 
l0: 0.637390, l1: 0.640078, l2: 0.631389, l3: 0.652481, l4: 0.679443, l5: 0.723574, l6: 0.879611

[epoch:   1/100, batch:   456/  792, ite: 37908] train loss: 5.999699, tar: 0.650145 
l0: 0.571183, l1: 0.571772, l2: 0.573349, l3: 0.585046, l4: 0.590999, l5: 0.615652, l6: 0.633684

[epoch:   1/100, batch:   458/  792, ite: 37909] train loss: 5.994674, tar: 0.649801 
l0: 0.642833, l1: 0.650337, l2: 0.652455, l3: 0.639507, l4: 0.687267, l5: 0.743894, l6: 0.802858

[epoch:   1/100, batch:   460/  792, ite: 37910] train loss: 5.993251, tar: 0.64

[epoch:   1/100, batch:   540/  792, ite: 37950] train loss: 5.904326, tar: 0.637983 
l0: 0.502150, l1: 0.506860, l2: 0.506504, l3: 0.513994, l4: 0.562424, l5: 0.709637, l6: 0.890672

[epoch:   1/100, batch:   542/  792, ite: 37951] train loss: 5.901187, tar: 0.637482 
l0: 0.690382, l1: 0.699648, l2: 0.690485, l3: 0.691137, l4: 0.719309, l5: 0.765067, l6: 0.912136

[epoch:   1/100, batch:   544/  792, ite: 37952] train loss: 5.902764, tar: 0.637676 
l0: 0.555420, l1: 0.556744, l2: 0.555516, l3: 0.552918, l4: 0.567474, l5: 0.632901, l6: 0.711975

[epoch:   1/100, batch:   546/  792, ite: 37953] train loss: 5.899368, tar: 0.637375 
l0: 0.396965, l1: 0.403378, l2: 0.402435, l3: 0.408158, l4: 0.449322, l5: 0.558802, l6: 0.826217

[epoch:   1/100, batch:   548/  792, ite: 37954] train loss: 5.893484, tar: 0.636498 
l0: 0.913914, l1: 0.923145, l2: 0.920171, l3: 0.909852, l4: 0.935923, l5: 0.975805, l6: 1.056738

[epoch:   1/100, batch:   550/  792, ite: 37955] train loss: 5.900084, tar: 0.63

[epoch:   1/100, batch:   630/  792, ite: 37995] train loss: 5.968999, tar: 0.646104 
l0: 0.922788, l1: 0.928690, l2: 0.924136, l3: 0.930102, l4: 0.988918, l5: 1.037977, l6: 1.067506

[epoch:   1/100, batch:   632/  792, ite: 37996] train loss: 5.975139, tar: 0.646980 
l0: 0.787950, l1: 0.789459, l2: 0.784384, l3: 0.772420, l4: 0.778369, l5: 0.854363, l6: 1.080987

[epoch:   1/100, batch:   634/  792, ite: 37997] train loss: 5.978630, tar: 0.647425 
l0: 0.690449, l1: 0.698441, l2: 0.697427, l3: 0.703919, l4: 0.773934, l5: 0.820074, l6: 1.067448

[epoch:   1/100, batch:   636/  792, ite: 37998] train loss: 5.980377, tar: 0.647560 
l0: 0.946640, l1: 0.953973, l2: 0.955806, l3: 0.947664, l4: 0.949176, l5: 1.063313, l6: 1.283405

[epoch:   1/100, batch:   638/  792, ite: 37999] train loss: 5.988181, tar: 0.648498 
l0: 0.854200, l1: 0.864126, l2: 0.864945, l3: 0.873402, l4: 0.914461, l5: 1.008642, l6: 1.330743

[epoch:   1/100, batch:   640/  792, ite: 38000] train loss: 5.994504, tar: 0.64

[epoch:   1/100, batch:   720/  792, ite: 38040] train loss: 5.736343, tar: 0.611880 
l0: 0.483319, l1: 0.486340, l2: 0.487284, l3: 0.498607, l4: 0.534459, l5: 0.604695, l6: 0.773035

[epoch:   1/100, batch:   722/  792, ite: 38041] train loss: 5.710140, tar: 0.608744 
l0: 0.413563, l1: 0.416943, l2: 0.411532, l3: 0.418926, l4: 0.451888, l5: 0.535925, l6: 0.821303

[epoch:   1/100, batch:   724/  792, ite: 38042] train loss: 5.678994, tar: 0.604097 
l0: 0.372353, l1: 0.376116, l2: 0.376326, l3: 0.395802, l4: 0.435716, l5: 0.526963, l6: 0.734127

[epoch:   1/100, batch:   726/  792, ite: 38043] train loss: 5.639570, tar: 0.598707 
l0: 1.022039, l1: 1.024695, l2: 1.032292, l3: 1.049131, l4: 1.073182, l5: 1.167998, l6: 1.181656

[epoch:   1/100, batch:   728/  792, ite: 38044] train loss: 5.710989, tar: 0.608329 
l0: 1.352743, l1: 1.342816, l2: 1.319078, l3: 1.315952, l4: 1.333226, l5: 1.470325, l6: 1.802966

[epoch:   1/100, batch:   730/  792, ite: 38045] train loss: 5.849095, tar: 0.62

l0: 0.533378, l1: 0.540418, l2: 0.538724, l3: 0.539012, l4: 0.546894, l5: 0.648745, l6: 0.749669

[epoch:   2/100, batch:    18/  792, ite: 38085] train loss: 5.961555, tar: 0.647753 
l0: 1.279619, l1: 1.292222, l2: 1.291006, l3: 1.296310, l4: 1.279136, l5: 1.378342, l6: 1.406930

[epoch:   2/100, batch:    20/  792, ite: 38086] train loss: 6.017040, tar: 0.655100 
l0: 0.669908, l1: 0.675275, l2: 0.677191, l3: 0.692868, l4: 0.738445, l5: 0.794120, l6: 0.981219

[epoch:   2/100, batch:    22/  792, ite: 38087] train loss: 6.019306, tar: 0.655270 
l0: 0.754395, l1: 0.763713, l2: 0.759130, l3: 0.746057, l4: 0.808203, l5: 0.948202, l6: 1.111782

[epoch:   2/100, batch:    24/  792, ite: 38088] train loss: 6.032085, tar: 0.656397 
l0: 0.826463, l1: 0.827744, l2: 0.830462, l3: 0.830040, l4: 0.817238, l5: 0.823603, l6: 1.035662

[epoch:   2/100, batch:    26/  792, ite: 38089] train loss: 6.043979, tar: 0.658307 
l0: 0.340831, l1: 0.344364, l2: 0.338603, l3: 0.347268, l4: 0.394204, l5: 0.4946

[epoch:   2/100, batch:   106/  792, ite: 38129] train loss: 6.012557, tar: 0.656797 
l0: 0.731408, l1: 0.734108, l2: 0.730926, l3: 0.734806, l4: 0.779321, l5: 0.868706, l6: 0.849685

[epoch:   2/100, batch:   108/  792, ite: 38130] train loss: 6.014908, tar: 0.657371 
l0: 0.756059, l1: 0.760586, l2: 0.771596, l3: 0.775242, l4: 0.771829, l5: 0.793149, l6: 0.951383

[epoch:   2/100, batch:   110/  792, ite: 38131] train loss: 6.019304, tar: 0.658125 
l0: 0.491475, l1: 0.501540, l2: 0.499623, l3: 0.503114, l4: 0.528600, l5: 0.563680, l6: 0.634298

[epoch:   2/100, batch:   112/  792, ite: 38132] train loss: 6.007289, tar: 0.656862 
l0: 0.523558, l1: 0.520698, l2: 0.526320, l3: 0.519572, l4: 0.499467, l5: 0.513699, l6: 0.580913

[epoch:   2/100, batch:   114/  792, ite: 38133] train loss: 5.994915, tar: 0.655860 
l0: 0.728212, l1: 0.735679, l2: 0.738012, l3: 0.744854, l4: 0.740840, l5: 0.839376, l6: 0.854325

[epoch:   2/100, batch:   116/  792, ite: 38134] train loss: 5.997388, tar: 0.65

[epoch:   2/100, batch:   196/  792, ite: 38174] train loss: 5.948353, tar: 0.649787 
l0: 0.774547, l1: 0.775012, l2: 0.769957, l3: 0.761655, l4: 0.775070, l5: 0.799822, l6: 0.797115

[epoch:   2/100, batch:   198/  792, ite: 38175] train loss: 5.950541, tar: 0.650500 
l0: 1.396157, l1: 1.420253, l2: 1.414359, l3: 1.437980, l4: 1.493588, l5: 1.499527, l6: 1.755268

[epoch:   2/100, batch:   200/  792, ite: 38176] train loss: 5.987083, tar: 0.654736 
l0: 0.503527, l1: 0.508958, l2: 0.504893, l3: 0.509959, l4: 0.532979, l5: 0.638862, l6: 0.815403

[epoch:   2/100, batch:   202/  792, ite: 38177] train loss: 5.980259, tar: 0.653882 
l0: 0.801918, l1: 0.804678, l2: 0.807082, l3: 0.804472, l4: 0.828312, l5: 0.902003, l6: 0.789422

[epoch:   2/100, batch:   204/  792, ite: 38178] train loss: 5.983521, tar: 0.654714 
l0: 1.468589, l1: 1.485393, l2: 1.484875, l3: 1.486100, l4: 1.570202, l5: 1.721841, l6: 1.909069

[epoch:   2/100, batch:   206/  792, ite: 38179] train loss: 6.023825, tar: 0.65

[epoch:   2/100, batch:   286/  792, ite: 38219] train loss: 5.995784, tar: 0.655779 
l0: 0.878544, l1: 0.893950, l2: 0.899731, l3: 0.902780, l4: 0.914483, l5: 1.033392, l6: 1.142064

[epoch:   2/100, batch:   288/  792, ite: 38220] train loss: 6.004305, tar: 0.656792 
l0: 0.810025, l1: 0.814282, l2: 0.813612, l3: 0.818845, l4: 0.830206, l5: 0.952527, l6: 1.178165

[epoch:   2/100, batch:   290/  792, ite: 38221] train loss: 6.011382, tar: 0.657485 
l0: 0.630468, l1: 0.620165, l2: 0.615446, l3: 0.623744, l4: 0.668263, l5: 0.813270, l6: 0.912935

[epoch:   2/100, batch:   292/  792, ite: 38222] train loss: 6.010743, tar: 0.657363 
l0: 0.367826, l1: 0.375780, l2: 0.370448, l3: 0.370460, l4: 0.406023, l5: 0.388006, l6: 0.445038

[epoch:   2/100, batch:   294/  792, ite: 38223] train loss: 5.998426, tar: 0.656065 
l0: 0.354858, l1: 0.360049, l2: 0.356832, l3: 0.363397, l4: 0.402626, l5: 0.474964, l6: 0.518501

[epoch:   2/100, batch:   296/  792, ite: 38224] train loss: 5.986942, tar: 0.65

[epoch:   2/100, batch:   376/  792, ite: 38264] train loss: 5.905422, tar: 0.643330 
l0: 0.402710, l1: 0.397610, l2: 0.393828, l3: 0.391869, l4: 0.385177, l5: 0.488321, l6: 0.582334

[epoch:   2/100, batch:   378/  792, ite: 38265] train loss: 5.896856, tar: 0.642422 
l0: 0.300849, l1: 0.307306, l2: 0.302503, l3: 0.310486, l4: 0.342375, l5: 0.452010, l6: 0.541295

[epoch:   2/100, batch:   380/  792, ite: 38266] train loss: 5.886225, tar: 0.641138 
l0: 0.792573, l1: 0.796200, l2: 0.795753, l3: 0.801552, l4: 0.814625, l5: 0.888299, l6: 0.952288

[epoch:   2/100, batch:   382/  792, ite: 38267] train loss: 5.889887, tar: 0.641705 
l0: 0.308992, l1: 0.317179, l2: 0.319221, l3: 0.325120, l4: 0.361378, l5: 0.547110, l6: 0.631017

[epoch:   2/100, batch:   384/  792, ite: 38268] train loss: 5.880822, tar: 0.640464 
l0: 0.431332, l1: 0.436108, l2: 0.438875, l3: 0.449742, l4: 0.550057, l5: 0.833519, l6: 0.963432

[epoch:   2/100, batch:   386/  792, ite: 38269] train loss: 5.877641, tar: 0.63

[epoch:   2/100, batch:   466/  792, ite: 38309] train loss: 5.855072, tar: 0.635787 
l0: 0.795987, l1: 0.797062, l2: 0.795431, l3: 0.799641, l4: 0.773672, l5: 0.816034, l6: 0.841673

[epoch:   2/100, batch:   468/  792, ite: 38310] train loss: 5.857268, tar: 0.636304 
l0: 0.732765, l1: 0.734812, l2: 0.739400, l3: 0.752866, l4: 0.803769, l5: 1.069328, l6: 1.196802

[epoch:   2/100, batch:   470/  792, ite: 38311] train loss: 5.861963, tar: 0.636614 
l0: 0.580530, l1: 0.587194, l2: 0.583553, l3: 0.601212, l4: 0.649470, l5: 0.774190, l6: 1.198371

[epoch:   2/100, batch:   472/  792, ite: 38312] train loss: 5.863280, tar: 0.636435 
l0: 0.446074, l1: 0.451626, l2: 0.453292, l3: 0.468054, l4: 0.555487, l5: 0.704487, l6: 0.802603

[epoch:   2/100, batch:   474/  792, ite: 38313] train loss: 5.859644, tar: 0.635826 
l0: 0.804549, l1: 0.799781, l2: 0.803518, l3: 0.807602, l4: 0.844303, l5: 1.037460, l6: 1.362177

[epoch:   2/100, batch:   476/  792, ite: 38314] train loss: 5.865882, tar: 0.63

[epoch:   2/100, batch:   556/  792, ite: 38354] train loss: 5.910014, tar: 0.640652 
l0: 0.384740, l1: 0.382147, l2: 0.378773, l3: 0.389091, l4: 0.437662, l5: 0.506461, l6: 0.646536

[epoch:   2/100, batch:   558/  792, ite: 38355] train loss: 5.904125, tar: 0.639931 
l0: 0.462174, l1: 0.464117, l2: 0.460836, l3: 0.455057, l4: 0.464645, l5: 0.562302, l6: 0.807981

[epoch:   2/100, batch:   560/  792, ite: 38356] train loss: 5.900534, tar: 0.639432 
l0: 0.571117, l1: 0.566813, l2: 0.555801, l3: 0.540783, l4: 0.567866, l5: 0.597652, l6: 0.795227

[epoch:   2/100, batch:   562/  792, ite: 38357] train loss: 5.898150, tar: 0.639241 
l0: 1.152187, l1: 1.157680, l2: 1.158943, l3: 1.173035, l4: 1.192510, l5: 1.270144, l6: 1.344516

[epoch:   2/100, batch:   564/  792, ite: 38358] train loss: 5.909251, tar: 0.640674 
l0: 0.422781, l1: 0.428430, l2: 0.422918, l3: 0.446120, l4: 0.502587, l5: 0.719407, l6: 0.839161

[epoch:   2/100, batch:   566/  792, ite: 38359] train loss: 5.905871, tar: 0.64

[epoch:   2/100, batch:   646/  792, ite: 38399] train loss: 5.895030, tar: 0.637213 
l0: 0.970397, l1: 0.981735, l2: 0.986775, l3: 1.000071, l4: 1.012023, l5: 1.043554, l6: 1.099262

[epoch:   2/100, batch:   648/  792, ite: 38400] train loss: 5.900952, tar: 0.638046 
l0: 0.554544, l1: 0.559678, l2: 0.561153, l3: 0.553910, l4: 0.580203, l5: 0.626495, l6: 0.791878

[epoch:   2/100, batch:   650/  792, ite: 38401] train loss: 5.899205, tar: 0.637838 
l0: 0.591689, l1: 0.599219, l2: 0.599051, l3: 0.626316, l4: 0.630424, l5: 0.765033, l6: 0.775328

[epoch:   2/100, batch:   652/  792, ite: 38402] train loss: 5.898065, tar: 0.637723 
l0: 0.965546, l1: 0.966715, l2: 0.963704, l3: 0.967164, l4: 0.976402, l5: 1.081355, l6: 1.447582

[epoch:   2/100, batch:   654/  792, ite: 38403] train loss: 5.905299, tar: 0.638536 
l0: 0.274168, l1: 0.286500, l2: 0.280186, l3: 0.281300, l4: 0.313833, l5: 0.401016, l6: 0.495914

[epoch:   2/100, batch:   656/  792, ite: 38404] train loss: 5.897815, tar: 0.63

[epoch:   2/100, batch:   736/  792, ite: 38444] train loss: 5.953357, tar: 0.645384 
l0: 0.580861, l1: 0.584895, l2: 0.585457, l3: 0.592606, l4: 0.630992, l5: 0.792808, l6: 0.960130

[epoch:   2/100, batch:   738/  792, ite: 38445] train loss: 5.952757, tar: 0.645239 
l0: 0.589069, l1: 0.598878, l2: 0.598315, l3: 0.620867, l4: 0.670759, l5: 0.705032, l6: 0.802664

[epoch:   2/100, batch:   740/  792, ite: 38446] train loss: 5.951741, tar: 0.645113 
l0: 0.532207, l1: 0.538065, l2: 0.543234, l3: 0.539838, l4: 0.571042, l5: 0.674027, l6: 0.749373

[epoch:   2/100, batch:   742/  792, ite: 38447] train loss: 5.949521, tar: 0.644860 
l0: 0.520826, l1: 0.530375, l2: 0.527553, l3: 0.525579, l4: 0.584815, l5: 0.717429, l6: 0.907083

[epoch:   2/100, batch:   744/  792, ite: 38448] train loss: 5.947932, tar: 0.644583 
l0: 0.584882, l1: 0.581750, l2: 0.579787, l3: 0.579173, l4: 0.602791, l5: 0.706703, l6: 0.843795

[epoch:   2/100, batch:   746/  792, ite: 38449] train loss: 5.946803, tar: 0.64

l0: 0.506959, l1: 0.507783, l2: 0.500623, l3: 0.516565, l4: 0.535903, l5: 0.632280, l6: 0.834828

[epoch:   3/100, batch:    34/  792, ite: 38489] train loss: 5.944831, tar: 0.644007 
l0: 0.376036, l1: 0.382934, l2: 0.384105, l3: 0.390961, l4: 0.423396, l5: 0.493525, l6: 0.684791

[epoch:   3/100, batch:    36/  792, ite: 38490] train loss: 5.940870, tar: 0.643460 
l0: 0.686517, l1: 0.693191, l2: 0.692956, l3: 0.709527, l4: 0.740308, l5: 0.889449, l6: 1.192942

[epoch:   3/100, batch:    38/  792, ite: 38491] train loss: 5.943113, tar: 0.643548 
l0: 0.640069, l1: 0.644884, l2: 0.644451, l3: 0.661262, l4: 0.698996, l5: 0.721813, l6: 0.951196

[epoch:   3/100, batch:    40/  792, ite: 38492] train loss: 5.943051, tar: 0.643541 
l0: 1.006691, l1: 1.013392, l2: 1.015831, l3: 1.026024, l4: 1.060887, l5: 1.133982, l6: 1.732720

[epoch:   3/100, batch:    42/  792, ite: 38493] train loss: 5.950747, tar: 0.644277 
l0: 0.436658, l1: 0.440707, l2: 0.437248, l3: 0.444918, l4: 0.492479, l5: 0.5950

[epoch:   3/100, batch:   122/  792, ite: 38533] train loss: 5.927185, tar: 0.640829 
l0: 0.320295, l1: 0.324940, l2: 0.320002, l3: 0.326067, l4: 0.352217, l5: 0.379477, l6: 0.521039

[epoch:   3/100, batch:   124/  792, ite: 38534] train loss: 5.921849, tar: 0.640228 
l0: 0.714824, l1: 0.720008, l2: 0.724477, l3: 0.743674, l4: 0.766447, l5: 0.837666, l6: 1.007907

[epoch:   3/100, batch:   126/  792, ite: 38535] train loss: 5.923411, tar: 0.640368 
l0: 1.096551, l1: 1.096318, l2: 1.088421, l3: 1.098267, l4: 1.095520, l5: 1.075030, l6: 1.097038

[epoch:   3/100, batch:   128/  792, ite: 38536] train loss: 5.928717, tar: 0.641219 
l0: 0.501019, l1: 0.501040, l2: 0.503047, l3: 0.505205, l4: 0.523418, l5: 0.585302, l6: 0.595947

[epoch:   3/100, batch:   130/  792, ite: 38537] train loss: 5.925752, tar: 0.640958 
l0: 0.871171, l1: 0.875167, l2: 0.862236, l3: 0.864037, l4: 0.904418, l5: 1.125870, l6: 1.519229

[epoch:   3/100, batch:   132/  792, ite: 38538] train loss: 5.930444, tar: 0.64

[epoch:   3/100, batch:   212/  792, ite: 38578] train loss: 5.930314, tar: 0.639574 
l0: 0.847662, l1: 0.859446, l2: 0.868218, l3: 0.867285, l4: 0.901296, l5: 0.935317, l6: 1.121587

[epoch:   3/100, batch:   214/  792, ite: 38579] train loss: 5.933300, tar: 0.639934 
l0: 0.426951, l1: 0.428500, l2: 0.432663, l3: 0.444704, l4: 0.481512, l5: 0.606690, l6: 0.821834

[epoch:   3/100, batch:   216/  792, ite: 38580] train loss: 5.930806, tar: 0.639566 
l0: 0.998059, l1: 0.998666, l2: 0.998916, l3: 1.011538, l4: 1.007353, l5: 1.137618, l6: 1.172959

[epoch:   3/100, batch:   218/  792, ite: 38581] train loss: 5.935417, tar: 0.640183 
l0: 0.452728, l1: 0.454787, l2: 0.456350, l3: 0.462109, l4: 0.461124, l5: 0.473496, l6: 0.606145

[epoch:   3/100, batch:   220/  792, ite: 38582] train loss: 5.932023, tar: 0.639861 
l0: 0.755574, l1: 0.771421, l2: 0.772561, l3: 0.772713, l4: 0.797882, l5: 0.865928, l6: 0.975883

[epoch:   3/100, batch:   222/  792, ite: 38583] train loss: 5.933479, tar: 0.64

[epoch:   3/100, batch:   302/  792, ite: 38623] train loss: 5.946991, tar: 0.641750 
l0: 0.459117, l1: 0.461289, l2: 0.472476, l3: 0.487240, l4: 0.488193, l5: 0.552644, l6: 0.654896

[epoch:   3/100, batch:   304/  792, ite: 38624] train loss: 5.944380, tar: 0.641458 
l0: 0.563750, l1: 0.568189, l2: 0.572044, l3: 0.585051, l4: 0.585597, l5: 0.670686, l6: 0.807906

[epoch:   3/100, batch:   306/  792, ite: 38625] train loss: 5.943310, tar: 0.641333 
l0: 0.571472, l1: 0.576518, l2: 0.577849, l3: 0.580659, l4: 0.587384, l5: 0.746997, l6: 0.834383

[epoch:   3/100, batch:   308/  792, ite: 38626] train loss: 5.942360, tar: 0.641222 
l0: 0.590660, l1: 0.585240, l2: 0.589702, l3: 0.583972, l4: 0.592223, l5: 0.638900, l6: 0.700805

[epoch:   3/100, batch:   310/  792, ite: 38627] train loss: 5.941032, tar: 0.641141 
l0: 0.857177, l1: 0.868155, l2: 0.861981, l3: 0.863155, l4: 0.903802, l5: 0.951584, l6: 0.944916

[epoch:   3/100, batch:   312/  792, ite: 38628] train loss: 5.943128, tar: 0.64

[epoch:   3/100, batch:   392/  792, ite: 38668] train loss: 5.939209, tar: 0.641363 
l0: 0.664287, l1: 0.658820, l2: 0.660194, l3: 0.660628, l4: 0.673754, l5: 0.779057, l6: 0.860449

[epoch:   3/100, batch:   394/  792, ite: 38669] train loss: 5.939098, tar: 0.641397 
l0: 0.272846, l1: 0.278594, l2: 0.281500, l3: 0.286359, l4: 0.312131, l5: 0.405711, l6: 0.474467

[epoch:   3/100, batch:   396/  792, ite: 38670] train loss: 5.934495, tar: 0.640847 
l0: 0.971339, l1: 0.960356, l2: 0.955454, l3: 0.932257, l4: 0.929725, l5: 0.951774, l6: 1.060868

[epoch:   3/100, batch:   398/  792, ite: 38671] train loss: 5.937567, tar: 0.641339 
l0: 0.368019, l1: 0.373587, l2: 0.373803, l3: 0.388236, l4: 0.398565, l5: 0.501671, l6: 0.675336

[epoch:   3/100, batch:   400/  792, ite: 38672] train loss: 5.934409, tar: 0.640933 
l0: 0.297836, l1: 0.304638, l2: 0.307660, l3: 0.308672, l4: 0.314350, l5: 0.399054, l6: 0.496168

[epoch:   3/100, batch:   402/  792, ite: 38673] train loss: 5.930024, tar: 0.64

[epoch:   3/100, batch:   482/  792, ite: 38713] train loss: 5.910716, tar: 0.638177 
l0: 1.247208, l1: 1.271255, l2: 1.267448, l3: 1.274351, l4: 1.327996, l5: 1.493820, l6: 1.901890

[epoch:   3/100, batch:   484/  792, ite: 38714] train loss: 5.918957, tar: 0.639030 
l0: 1.050122, l1: 1.059866, l2: 1.057306, l3: 1.049505, l4: 1.046185, l5: 1.059360, l6: 1.252874

[epoch:   3/100, batch:   486/  792, ite: 38715] train loss: 5.923173, tar: 0.639605 
l0: 0.781452, l1: 0.789079, l2: 0.785059, l3: 0.785870, l4: 0.830207, l5: 0.938164, l6: 1.054493

[epoch:   3/100, batch:   488/  792, ite: 38716] train loss: 5.924748, tar: 0.639804 
l0: 0.702230, l1: 0.711357, l2: 0.716939, l3: 0.730193, l4: 0.738532, l5: 0.833330, l6: 0.997105

[epoch:   3/100, batch:   490/  792, ite: 38717] train loss: 5.925460, tar: 0.639891 
l0: 0.334187, l1: 0.336757, l2: 0.333882, l3: 0.340282, l4: 0.404043, l5: 0.490355, l6: 0.613644

[epoch:   3/100, batch:   492/  792, ite: 38718] train loss: 5.922183, tar: 0.63

[epoch:   3/100, batch:   572/  792, ite: 38758] train loss: 5.925450, tar: 0.639257 
l0: 0.774916, l1: 0.777032, l2: 0.776273, l3: 0.778343, l4: 0.798253, l5: 0.859860, l6: 1.001595

[epoch:   3/100, batch:   574/  792, ite: 38759] train loss: 5.926569, tar: 0.639436 
l0: 0.569660, l1: 0.572728, l2: 0.570839, l3: 0.569849, l4: 0.591946, l5: 0.615099, l6: 0.792770

[epoch:   3/100, batch:   576/  792, ite: 38760] train loss: 5.925466, tar: 0.639344 
l0: 0.665385, l1: 0.658847, l2: 0.657409, l3: 0.664030, l4: 0.675604, l5: 0.680802, l6: 0.726900

[epoch:   3/100, batch:   578/  792, ite: 38761] train loss: 5.924991, tar: 0.639378 
l0: 0.307543, l1: 0.310058, l2: 0.311659, l3: 0.328122, l4: 0.355761, l5: 0.406038, l6: 0.522840

[epoch:   3/100, batch:   580/  792, ite: 38762] train loss: 5.921178, tar: 0.638942 
l0: 0.803084, l1: 0.799468, l2: 0.802603, l3: 0.799168, l4: 0.850395, l5: 0.963412, l6: 0.957811

[epoch:   3/100, batch:   582/  792, ite: 38763] train loss: 5.922687, tar: 0.63

[epoch:   3/100, batch:   662/  792, ite: 38803] train loss: 5.922059, tar: 0.638957 
l0: 0.363086, l1: 0.370504, l2: 0.366745, l3: 0.373283, l4: 0.378891, l5: 0.407105, l6: 0.509585

[epoch:   3/100, batch:   664/  792, ite: 38804] train loss: 5.918862, tar: 0.638613 
l0: 0.580297, l1: 0.585760, l2: 0.587631, l3: 0.601148, l4: 0.644683, l5: 0.810921, l6: 0.828644

[epoch:   3/100, batch:   666/  792, ite: 38805] train loss: 5.918358, tar: 0.638541 
l0: 0.525665, l1: 0.529558, l2: 0.530257, l3: 0.538188, l4: 0.569044, l5: 0.633753, l6: 0.805307

[epoch:   3/100, batch:   668/  792, ite: 38806] train loss: 5.917285, tar: 0.638401 
l0: 0.708852, l1: 0.717553, l2: 0.718904, l3: 0.731723, l4: 0.750991, l5: 0.812781, l6: 1.031961

[epoch:   3/100, batch:   670/  792, ite: 38807] train loss: 5.918087, tar: 0.638488 
l0: 0.293467, l1: 0.292253, l2: 0.293088, l3: 0.296431, l4: 0.326390, l5: 0.409283, l6: 0.588493

[epoch:   3/100, batch:   672/  792, ite: 38808] train loss: 5.914617, tar: 0.63

[epoch:   3/100, batch:   752/  792, ite: 38848] train loss: 5.904275, tar: 0.637020 
l0: 0.342717, l1: 0.344269, l2: 0.341524, l3: 0.346334, l4: 0.358725, l5: 0.520496, l6: 0.680928

[epoch:   3/100, batch:   754/  792, ite: 38849] train loss: 5.901687, tar: 0.636673 
l0: 0.799213, l1: 0.799718, l2: 0.800881, l3: 0.810902, l4: 0.853056, l5: 0.928494, l6: 1.059515

[epoch:   3/100, batch:   756/  792, ite: 38850] train loss: 5.903194, tar: 0.636865 
l0: 0.692860, l1: 0.694005, l2: 0.696221, l3: 0.702781, l4: 0.754986, l5: 0.847996, l6: 1.484573

[epoch:   3/100, batch:   758/  792, ite: 38851] train loss: 5.905076, tar: 0.636930 
l0: 0.660504, l1: 0.662833, l2: 0.658141, l3: 0.659109, l4: 0.699827, l5: 0.843814, l6: 1.240583

[epoch:   3/100, batch:   760/  792, ite: 38852] train loss: 5.905994, tar: 0.636958 
l0: 0.379014, l1: 0.386166, l2: 0.382639, l3: 0.386454, l4: 0.436293, l5: 0.579635, l6: 0.635683

[epoch:   3/100, batch:   762/  792, ite: 38853] train loss: 5.903691, tar: 0.63

l0: 0.731308, l1: 0.735048, l2: 0.737239, l3: 0.737250, l4: 0.772354, l5: 0.833182, l6: 1.157742

[epoch:   4/100, batch:    50/  792, ite: 38893] train loss: 5.892419, tar: 0.634887 
l0: 0.840648, l1: 0.848639, l2: 0.843052, l3: 0.842371, l4: 0.876893, l5: 0.884563, l6: 1.097489

[epoch:   4/100, batch:    52/  792, ite: 38894] train loss: 5.894041, tar: 0.635118 
l0: 1.223472, l1: 1.226182, l2: 1.228240, l3: 1.239766, l4: 1.293202, l5: 1.273012, l6: 1.337271

[epoch:   4/100, batch:    54/  792, ite: 38895] train loss: 5.898961, tar: 0.635775 
l0: 0.985821, l1: 0.989241, l2: 0.990869, l3: 1.004302, l4: 1.039068, l5: 1.078378, l6: 1.198897

[epoch:   4/100, batch:    56/  792, ite: 38896] train loss: 5.901814, tar: 0.636166 
l0: 0.608874, l1: 0.613100, l2: 0.613519, l3: 0.620835, l4: 0.632333, l5: 0.689578, l6: 0.936927

[epoch:   4/100, batch:    58/  792, ite: 38897] train loss: 5.901575, tar: 0.636135 
l0: 0.694684, l1: 0.707052, l2: 0.703760, l3: 0.714363, l4: 0.719374, l5: 0.7657

[epoch:   4/100, batch:   138/  792, ite: 38937] train loss: 5.900724, tar: 0.636044 
l0: 0.432721, l1: 0.431310, l2: 0.431583, l3: 0.435832, l4: 0.471953, l5: 0.559473, l6: 0.640617

[epoch:   4/100, batch:   140/  792, ite: 38938] train loss: 5.898700, tar: 0.635827 
l0: 0.492258, l1: 0.486339, l2: 0.486868, l3: 0.491501, l4: 0.496888, l5: 0.503543, l6: 0.636672

[epoch:   4/100, batch:   142/  792, ite: 38939] train loss: 5.896937, tar: 0.635674 
l0: 0.394097, l1: 0.399499, l2: 0.398248, l3: 0.404071, l4: 0.456233, l5: 0.595213, l6: 0.851904

[epoch:   4/100, batch:   144/  792, ite: 38940] train loss: 5.895322, tar: 0.635417 
l0: 0.714211, l1: 0.713120, l2: 0.715700, l3: 0.739175, l4: 0.777708, l5: 0.887021, l6: 1.062341

[epoch:   4/100, batch:   146/  792, ite: 38941] train loss: 5.896231, tar: 0.635501 
l0: 0.521638, l1: 0.529015, l2: 0.530963, l3: 0.536549, l4: 0.613913, l5: 0.738359, l6: 0.793476

[epoch:   4/100, batch:   148/  792, ite: 38942] train loss: 5.895412, tar: 0.63

[epoch:   4/100, batch:   228/  792, ite: 38982] train loss: 5.875549, tar: 0.633281 
l0: 0.648541, l1: 0.655282, l2: 0.658167, l3: 0.658114, l4: 0.690923, l5: 0.823780, l6: 0.900562

[epoch:   4/100, batch:   230/  792, ite: 38983] train loss: 5.875601, tar: 0.633297 
l0: 0.581192, l1: 0.588345, l2: 0.587079, l3: 0.594816, l4: 0.596154, l5: 0.646179, l6: 0.821222

[epoch:   4/100, batch:   232/  792, ite: 38984] train loss: 5.875106, tar: 0.633244 
l0: 0.564556, l1: 0.567201, l2: 0.566333, l3: 0.571624, l4: 0.596476, l5: 0.700832, l6: 1.027564

[epoch:   4/100, batch:   234/  792, ite: 38985] train loss: 5.874829, tar: 0.633174 
l0: 1.126081, l1: 1.144624, l2: 1.131648, l3: 1.142929, l4: 1.218880, l5: 1.407292, l6: 1.626128

[epoch:   4/100, batch:   236/  792, ite: 38986] train loss: 5.879513, tar: 0.633674 
l0: 0.576247, l1: 0.574692, l2: 0.573295, l3: 0.575396, l4: 0.585696, l5: 0.687121, l6: 0.943764

[epoch:   4/100, batch:   238/  792, ite: 38987] train loss: 5.879129, tar: 0.63

[epoch:   4/100, batch:   318/  792, ite: 39027] train loss: 5.882052, tar: 0.634262 
l0: 0.331398, l1: 0.330870, l2: 0.327869, l3: 0.343212, l4: 0.401135, l5: 0.478316, l6: 0.566110

[epoch:   4/100, batch:   320/  792, ite: 39028] train loss: 5.879641, tar: 0.633967 
l0: 0.408380, l1: 0.414707, l2: 0.413537, l3: 0.419378, l4: 0.459142, l5: 0.569260, l6: 0.715975

[epoch:   4/100, batch:   322/  792, ite: 39029] train loss: 5.877949, tar: 0.633748 
l0: 0.907560, l1: 0.917380, l2: 0.917118, l3: 0.927649, l4: 0.937091, l5: 0.983155, l6: 1.525935

[epoch:   4/100, batch:   324/  792, ite: 39030] train loss: 5.880680, tar: 0.634014 
l0: 0.631605, l1: 0.635847, l2: 0.636009, l3: 0.635581, l4: 0.634814, l5: 0.704895, l6: 0.718664

[epoch:   4/100, batch:   326/  792, ite: 39031] train loss: 5.880292, tar: 0.634012 
l0: 0.490207, l1: 0.493227, l2: 0.488975, l3: 0.501836, l4: 0.547929, l5: 0.618882, l6: 0.794192

[epoch:   4/100, batch:   328/  792, ite: 39032] train loss: 5.879427, tar: 0.63

[epoch:   4/100, batch:   408/  792, ite: 39072] train loss: 5.877274, tar: 0.633467 
l0: 0.876992, l1: 0.877971, l2: 0.877522, l3: 0.857376, l4: 0.899396, l5: 1.046184, l6: 1.453398

[epoch:   4/100, batch:   410/  792, ite: 39073] train loss: 5.879564, tar: 0.633694 
l0: 1.014641, l1: 1.021833, l2: 1.023852, l3: 1.031696, l4: 1.044400, l5: 1.096072, l6: 1.208816

[epoch:   4/100, batch:   412/  792, ite: 39074] train loss: 5.882110, tar: 0.634048 
l0: 0.565176, l1: 0.560655, l2: 0.559849, l3: 0.571198, l4: 0.591802, l5: 0.761452, l6: 0.785093

[epoch:   4/100, batch:   414/  792, ite: 39075] train loss: 5.881510, tar: 0.633984 
l0: 1.026472, l1: 1.031897, l2: 1.034893, l3: 1.041956, l4: 1.078662, l5: 1.204375, l6: 1.393905

[epoch:   4/100, batch:   416/  792, ite: 39076] train loss: 5.884739, tar: 0.634349 
l0: 0.500649, l1: 0.502736, l2: 0.499707, l3: 0.505857, l4: 0.542188, l5: 0.690129, l6: 0.747658

[epoch:   4/100, batch:   418/  792, ite: 39077] train loss: 5.883819, tar: 0.63

[epoch:   4/100, batch:   498/  792, ite: 39117] train loss: 5.879377, tar: 0.633726 
l0: 0.582587, l1: 0.586557, l2: 0.575500, l3: 0.579206, l4: 0.605547, l5: 0.648054, l6: 0.698608

[epoch:   4/100, batch:   500/  792, ite: 39118] train loss: 5.878585, tar: 0.633680 
l0: 0.305773, l1: 0.309913, l2: 0.317101, l3: 0.322660, l4: 0.354219, l5: 0.437528, l6: 0.553862

[epoch:   4/100, batch:   502/  792, ite: 39119] train loss: 5.876185, tar: 0.633387 
l0: 0.780774, l1: 0.779898, l2: 0.786774, l3: 0.800177, l4: 0.817087, l5: 0.812361, l6: 1.200376

[epoch:   4/100, batch:   504/  792, ite: 39120] train loss: 5.877451, tar: 0.633519 
l0: 0.404145, l1: 0.405753, l2: 0.400814, l3: 0.393496, l4: 0.414887, l5: 0.497970, l6: 0.609155

[epoch:   4/100, batch:   506/  792, ite: 39121] train loss: 5.875647, tar: 0.633314 
l0: 0.566680, l1: 0.565394, l2: 0.569925, l3: 0.576841, l4: 0.587598, l5: 0.651564, l6: 0.777117

[epoch:   4/100, batch:   508/  792, ite: 39122] train loss: 5.875002, tar: 0.63

[epoch:   4/100, batch:   588/  792, ite: 39162] train loss: 5.868130, tar: 0.632210 
l0: 0.504711, l1: 0.522258, l2: 0.524795, l3: 0.535956, l4: 0.582429, l5: 0.652318, l6: 0.940632

[epoch:   4/100, batch:   590/  792, ite: 39163] train loss: 5.867587, tar: 0.632101 
l0: 0.825638, l1: 0.829694, l2: 0.828333, l3: 0.842232, l4: 0.913776, l5: 1.076526, l6: 1.348436

[epoch:   4/100, batch:   592/  792, ite: 39164] train loss: 5.869512, tar: 0.632267 
l0: 0.659941, l1: 0.666981, l2: 0.668725, l3: 0.680601, l4: 0.713933, l5: 0.723029, l6: 0.836728

[epoch:   4/100, batch:   594/  792, ite: 39165] train loss: 5.869537, tar: 0.632291 
l0: 0.471405, l1: 0.474621, l2: 0.473620, l3: 0.482045, l4: 0.531576, l5: 0.648104, l6: 0.904730

[epoch:   4/100, batch:   596/  792, ite: 39166] train loss: 5.868715, tar: 0.632153 
l0: 0.420320, l1: 0.423355, l2: 0.423140, l3: 0.426313, l4: 0.455631, l5: 0.549445, l6: 0.683557

[epoch:   4/100, batch:   598/  792, ite: 39167] train loss: 5.867237, tar: 0.63

[epoch:   4/100, batch:   678/  792, ite: 39207] train loss: 5.870850, tar: 0.632242 
l0: 0.620223, l1: 0.629192, l2: 0.630250, l3: 0.643830, l4: 0.667164, l5: 0.767046, l6: 1.013829

[epoch:   4/100, batch:   680/  792, ite: 39208] train loss: 5.871003, tar: 0.632232 
l0: 0.595698, l1: 0.597641, l2: 0.588892, l3: 0.585977, l4: 0.595077, l5: 0.663752, l6: 0.718764

[epoch:   4/100, batch:   682/  792, ite: 39209] train loss: 5.870484, tar: 0.632202 
l0: 0.459200, l1: 0.466244, l2: 0.466215, l3: 0.464907, l4: 0.516353, l5: 0.657644, l6: 0.743090

[epoch:   4/100, batch:   684/  792, ite: 39210] train loss: 5.869456, tar: 0.632059 
l0: 0.525227, l1: 0.531630, l2: 0.531765, l3: 0.533439, l4: 0.531693, l5: 0.649781, l6: 0.868005

[epoch:   4/100, batch:   686/  792, ite: 39211] train loss: 5.868773, tar: 0.631971 
l0: 0.417396, l1: 0.423251, l2: 0.421968, l3: 0.431205, l4: 0.449907, l5: 0.540283, l6: 0.606275

[epoch:   4/100, batch:   688/  792, ite: 39212] train loss: 5.867159, tar: 0.63

[epoch:   4/100, batch:   768/  792, ite: 39252] train loss: 5.859219, tar: 0.631219 
l0: 0.800805, l1: 0.796036, l2: 0.788960, l3: 0.783943, l4: 0.811090, l5: 0.796908, l6: 0.904159

[epoch:   4/100, batch:   770/  792, ite: 39253] train loss: 5.859796, tar: 0.631354 
l0: 0.875882, l1: 0.879656, l2: 0.878026, l3: 0.876512, l4: 0.928952, l5: 1.124831, l6: 1.745220

[epoch:   4/100, batch:   772/  792, ite: 39254] train loss: 5.862449, tar: 0.631549 
l0: 0.341210, l1: 0.350505, l2: 0.348996, l3: 0.344316, l4: 0.348199, l5: 0.474405, l6: 0.572300

[epoch:   4/100, batch:   774/  792, ite: 39255] train loss: 5.860493, tar: 0.631318 
l0: 0.457969, l1: 0.462820, l2: 0.480151, l3: 0.494120, l4: 0.523898, l5: 0.571731, l6: 0.801015

[epoch:   4/100, batch:   776/  792, ite: 39256] train loss: 5.859506, tar: 0.631180 
l0: 1.082240, l1: 1.098839, l2: 1.083358, l3: 1.068416, l4: 1.088353, l5: 1.263752, l6: 1.258490

[epoch:   4/100, batch:   778/  792, ite: 39257] train loss: 5.862270, tar: 0.63

l0: 0.337634, l1: 0.341895, l2: 0.340511, l3: 0.348759, l4: 0.380899, l5: 0.430798, l6: 0.516598

[epoch:   5/100, batch:    66/  792, ite: 39297] train loss: 5.858964, tar: 0.631166 
l0: 0.820850, l1: 0.832928, l2: 0.838869, l3: 0.842007, l4: 0.878531, l5: 0.997637, l6: 0.999254

[epoch:   5/100, batch:    68/  792, ite: 39298] train loss: 5.860072, tar: 0.631313 
l0: 0.739620, l1: 0.745743, l2: 0.751648, l3: 0.764002, l4: 0.782384, l5: 0.844789, l6: 1.106495

[epoch:   5/100, batch:    70/  792, ite: 39299] train loss: 5.860825, tar: 0.631396 
l0: 0.677756, l1: 0.680770, l2: 0.679049, l3: 0.688145, l4: 0.717781, l5: 0.855247, l6: 1.229095

[epoch:   5/100, batch:    72/  792, ite: 39300] train loss: 5.861548, tar: 0.631432 
l0: 0.695521, l1: 0.708188, l2: 0.709776, l3: 0.712008, l4: 0.755142, l5: 0.872553, l6: 1.014658

[epoch:   5/100, batch:    74/  792, ite: 39301] train loss: 5.862271, tar: 0.631481 
l0: 0.688859, l1: 0.695327, l2: 0.700716, l3: 0.717362, l4: 0.777662, l5: 0.9662

[epoch:   5/100, batch:   154/  792, ite: 39341] train loss: 5.862450, tar: 0.631452 
l0: 0.454661, l1: 0.460959, l2: 0.457730, l3: 0.455479, l4: 0.472404, l5: 0.602846, l6: 0.799831

[epoch:   5/100, batch:   156/  792, ite: 39342] train loss: 5.861460, tar: 0.631320 
l0: 0.548904, l1: 0.555539, l2: 0.551319, l3: 0.555340, l4: 0.546628, l5: 0.566134, l6: 0.595412

[epoch:   5/100, batch:   158/  792, ite: 39343] train loss: 5.860530, tar: 0.631259 
l0: 0.453073, l1: 0.459864, l2: 0.462281, l3: 0.465422, l4: 0.485960, l5: 0.550551, l6: 0.651127

[epoch:   5/100, batch:   160/  792, ite: 39344] train loss: 5.859388, tar: 0.631126 
l0: 0.319534, l1: 0.321173, l2: 0.318508, l3: 0.313478, l4: 0.337088, l5: 0.403649, l6: 0.602096

[epoch:   5/100, batch:   162/  792, ite: 39345] train loss: 5.857412, tar: 0.630894 
l0: 1.153945, l1: 1.152234, l2: 1.149422, l3: 1.141253, l4: 1.162123, l5: 1.281602, l6: 1.326031

[epoch:   5/100, batch:   164/  792, ite: 39346] train loss: 5.860229, tar: 0.63

[epoch:   5/100, batch:   244/  792, ite: 39386] train loss: 5.846253, tar: 0.629400 
l0: 0.506459, l1: 0.509523, l2: 0.513397, l3: 0.518637, l4: 0.534271, l5: 0.604741, l6: 0.740768

[epoch:   5/100, batch:   246/  792, ite: 39387] train loss: 5.845438, tar: 0.629312 
l0: 0.345222, l1: 0.351420, l2: 0.351699, l3: 0.364444, l4: 0.424911, l5: 0.517298, l6: 0.794844

[epoch:   5/100, batch:   248/  792, ite: 39388] train loss: 5.844084, tar: 0.629107 
l0: 0.935267, l1: 0.934876, l2: 0.929332, l3: 0.925806, l4: 0.933334, l5: 1.015213, l6: 1.214888

[epoch:   5/100, batch:   250/  792, ite: 39389] train loss: 5.845804, tar: 0.629327 
l0: 0.552552, l1: 0.556262, l2: 0.552997, l3: 0.564680, l4: 0.569263, l5: 0.624763, l6: 0.751052

[epoch:   5/100, batch:   252/  792, ite: 39390] train loss: 5.845170, tar: 0.629272 
l0: 0.456474, l1: 0.462279, l2: 0.464338, l3: 0.465835, l4: 0.466624, l5: 0.550851, l6: 0.693224

[epoch:   5/100, batch:   254/  792, ite: 39391] train loss: 5.844033, tar: 0.62

[epoch:   5/100, batch:   334/  792, ite: 39431] train loss: 5.845545, tar: 0.629498 
l0: 0.460546, l1: 0.462913, l2: 0.460549, l3: 0.462144, l4: 0.472844, l5: 0.589394, l6: 0.659019

[epoch:   5/100, batch:   336/  792, ite: 39432] train loss: 5.844564, tar: 0.629380 
l0: 1.041891, l1: 1.062392, l2: 1.051648, l3: 1.049239, l4: 1.066280, l5: 1.151390, l6: 1.309899

[epoch:   5/100, batch:   338/  792, ite: 39433] train loss: 5.846820, tar: 0.629668 
l0: 0.683845, l1: 0.682467, l2: 0.689093, l3: 0.693988, l4: 0.680538, l5: 0.732509, l6: 0.943184

[epoch:   5/100, batch:   340/  792, ite: 39434] train loss: 5.846985, tar: 0.629706 
l0: 0.544180, l1: 0.547025, l2: 0.550582, l3: 0.546047, l4: 0.563958, l5: 0.754703, l6: 1.037623

[epoch:   5/100, batch:   342/  792, ite: 39435] train loss: 5.846809, tar: 0.629646 
l0: 0.481928, l1: 0.491997, l2: 0.493492, l3: 0.493164, l4: 0.516571, l5: 0.599983, l6: 0.728331

[epoch:   5/100, batch:   344/  792, ite: 39436] train loss: 5.846002, tar: 0.62

[epoch:   5/100, batch:   424/  792, ite: 39476] train loss: 5.839944, tar: 0.628927 
l0: 0.665860, l1: 0.672262, l2: 0.671189, l3: 0.675110, l4: 0.715405, l5: 0.770480, l6: 0.862747

[epoch:   5/100, batch:   426/  792, ite: 39477] train loss: 5.840085, tar: 0.628952 
l0: 0.507263, l1: 0.507957, l2: 0.509682, l3: 0.521176, l4: 0.620286, l5: 0.675457, l6: 1.492200

[epoch:   5/100, batch:   428/  792, ite: 39478] train loss: 5.840420, tar: 0.628870 
l0: 0.823758, l1: 0.823931, l2: 0.829801, l3: 0.822169, l4: 0.835416, l5: 0.988039, l6: 1.154002

[epoch:   5/100, batch:   430/  792, ite: 39479] train loss: 5.841532, tar: 0.629002 
l0: 2.762496, l1: 2.688767, l2: 2.688538, l3: 2.724302, l4: 3.013302, l5: 2.948778, l6: 2.546823

[epoch:   5/100, batch:   432/  792, ite: 39480] train loss: 5.852185, tar: 0.630443 
l0: 0.567894, l1: 0.566066, l2: 0.567225, l3: 0.579201, l4: 0.602721, l5: 0.651879, l6: 1.048181

[epoch:   5/100, batch:   434/  792, ite: 39481] train loss: 5.852042, tar: 0.63

[epoch:   5/100, batch:   514/  792, ite: 39521] train loss: 5.847271, tar: 0.629804 
l0: 0.637828, l1: 0.645901, l2: 0.646812, l3: 0.654984, l4: 0.674635, l5: 0.685735, l6: 0.927390

[epoch:   5/100, batch:   516/  792, ite: 39522] train loss: 5.847376, tar: 0.629809 
l0: 1.366427, l1: 1.369105, l2: 1.366441, l3: 1.389855, l4: 1.432652, l5: 1.555904, l6: 1.382364

[epoch:   5/100, batch:   518/  792, ite: 39523] train loss: 5.850933, tar: 0.630293 
l0: 0.733504, l1: 0.729162, l2: 0.723227, l3: 0.725997, l4: 0.719228, l5: 0.769107, l6: 1.008289

[epoch:   5/100, batch:   520/  792, ite: 39524] train loss: 5.851394, tar: 0.630360 
l0: 0.506655, l1: 0.511864, l2: 0.514633, l3: 0.513827, l4: 0.530765, l5: 0.723920, l6: 0.902529

[epoch:   5/100, batch:   522/  792, ite: 39525] train loss: 5.850925, tar: 0.630279 
l0: 0.532995, l1: 0.533304, l2: 0.534182, l3: 0.550664, l4: 0.602352, l5: 0.735269, l6: 0.847380

[epoch:   5/100, batch:   524/  792, ite: 39526] train loss: 5.850547, tar: 0.63

[epoch:   5/100, batch:   604/  792, ite: 39566] train loss: 5.847301, tar: 0.630034 
l0: 0.431556, l1: 0.425414, l2: 0.422538, l3: 0.423234, l4: 0.445637, l5: 0.581680, l6: 0.766812

[epoch:   5/100, batch:   606/  792, ite: 39567] train loss: 5.846322, tar: 0.629907 
l0: 0.536055, l1: 0.536527, l2: 0.539683, l3: 0.556432, l4: 0.575691, l5: 0.618872, l6: 0.808347

[epoch:   5/100, batch:   608/  792, ite: 39568] train loss: 5.845794, tar: 0.629847 
l0: 0.771203, l1: 0.777858, l2: 0.784605, l3: 0.799035, l4: 0.877755, l5: 0.971568, l6: 1.200129

[epoch:   5/100, batch:   610/  792, ite: 39569] train loss: 5.846773, tar: 0.629937 
l0: 1.065092, l1: 1.074473, l2: 1.077875, l3: 1.069929, l4: 1.093940, l5: 1.140092, l6: 1.205829

[epoch:   5/100, batch:   612/  792, ite: 39570] train loss: 5.848758, tar: 0.630215 
l0: 0.856239, l1: 0.853316, l2: 0.854672, l3: 0.853574, l4: 0.903214, l5: 0.929691, l6: 0.982672

[epoch:   5/100, batch:   614/  792, ite: 39571] train loss: 5.849674, tar: 0.63

[epoch:   5/100, batch:   694/  792, ite: 39611] train loss: 5.842505, tar: 0.629389 
l0: 0.639919, l1: 0.650064, l2: 0.652888, l3: 0.663272, l4: 0.663306, l5: 0.695262, l6: 0.873103

[epoch:   5/100, batch:   696/  792, ite: 39612] train loss: 5.842433, tar: 0.629395 
l0: 0.393607, l1: 0.394146, l2: 0.400198, l3: 0.407016, l4: 0.452923, l5: 0.662744, l6: 0.877620

[epoch:   5/100, batch:   698/  792, ite: 39613] train loss: 5.841639, tar: 0.629249 
l0: 0.632725, l1: 0.640696, l2: 0.651857, l3: 0.693838, l4: 0.734387, l5: 0.845765, l6: 1.063892

[epoch:   5/100, batch:   700/  792, ite: 39614] train loss: 5.842056, tar: 0.629251 
l0: 0.494086, l1: 0.494670, l2: 0.487771, l3: 0.495610, l4: 0.520681, l5: 0.610765, l6: 0.673558

[epoch:   5/100, batch:   702/  792, ite: 39615] train loss: 5.841250, tar: 0.629168 
l0: 0.670446, l1: 0.668156, l2: 0.669807, l3: 0.676519, l4: 0.681133, l5: 0.813322, l6: 0.879897

[epoch:   5/100, batch:   704/  792, ite: 39616] train loss: 5.841369, tar: 0.62

[epoch:   5/100, batch:   784/  792, ite: 39656] train loss: 5.841203, tar: 0.629030 
l0: 1.218529, l1: 1.215789, l2: 1.220206, l3: 1.221660, l4: 1.237281, l5: 1.261655, l6: 1.399360

[epoch:   5/100, batch:   786/  792, ite: 39657] train loss: 5.843746, tar: 0.629386 
l0: 0.771405, l1: 0.786681, l2: 0.785280, l3: 0.788552, l4: 0.783905, l5: 0.906653, l6: 1.084393

[epoch:   5/100, batch:   788/  792, ite: 39658] train loss: 5.844460, tar: 0.629471 
l0: 0.672021, l1: 0.675575, l2: 0.679145, l3: 0.670853, l4: 0.689865, l5: 0.860772, l6: 0.976442

[epoch:   5/100, batch:   790/  792, ite: 39659] train loss: 5.844677, tar: 0.629497 
l0: 0.462311, l1: 0.463580, l2: 0.465192, l3: 0.472999, l4: 0.496483, l5: 0.610787, l6: 0.751956

[epoch:   5/100, batch:   792/  792, ite: 39660] train loss: 5.843844, tar: 0.629396 
Starting epoch 6
Epoch 6 loading complete
l0: 0.583212, l1: 0.582435, l2: 0.584993, l3: 0.585070, l4: 0.583687, l5: 0.702159, l6: 0.990865

[epoch:   6/100, batch:     2/  792, i

l0: 0.471548, l1: 0.477670, l2: 0.482369, l3: 0.496463, l4: 0.534708, l5: 0.629687, l6: 0.747226

[epoch:   6/100, batch:    82/  792, ite: 39701] train loss: 5.838093, tar: 0.628691 
l0: 1.029223, l1: 1.033122, l2: 1.032441, l3: 1.032757, l4: 1.077082, l5: 1.086882, l6: 1.200971

[epoch:   6/100, batch:    84/  792, ite: 39702] train loss: 5.839861, tar: 0.628926 
l0: 0.333582, l1: 0.331888, l2: 0.330572, l3: 0.335829, l4: 0.357047, l5: 0.419551, l6: 0.561445

[epoch:   6/100, batch:    86/  792, ite: 39703] train loss: 5.838354, tar: 0.628753 
l0: 0.316022, l1: 0.318637, l2: 0.317086, l3: 0.327262, l4: 0.354776, l5: 0.388336, l6: 0.463780

[epoch:   6/100, batch:    88/  792, ite: 39704] train loss: 5.836746, tar: 0.628569 
l0: 0.788303, l1: 0.791333, l2: 0.788628, l3: 0.792378, l4: 0.807835, l5: 0.924872, l6: 1.565022

[epoch:   6/100, batch:    90/  792, ite: 39705] train loss: 5.838054, tar: 0.628663 
l0: 0.389110, l1: 0.400298, l2: 0.400986, l3: 0.396100, l4: 0.446280, l5: 0.4917

[epoch:   6/100, batch:   170/  792, ite: 39745] train loss: 5.831354, tar: 0.627829 
l0: 0.750612, l1: 0.753919, l2: 0.750156, l3: 0.753684, l4: 0.788818, l5: 0.834952, l6: 0.860672

[epoch:   6/100, batch:   172/  792, ite: 39746] train loss: 5.831728, tar: 0.627899 
l0: 0.360549, l1: 0.363551, l2: 0.364700, l3: 0.370377, l4: 0.414417, l5: 0.590755, l6: 0.793830

[epoch:   6/100, batch:   174/  792, ite: 39747] train loss: 5.830737, tar: 0.627746 
l0: 1.025761, l1: 1.032205, l2: 1.050086, l3: 1.106829, l4: 1.186489, l5: 1.201942, l6: 1.451501

[epoch:   6/100, batch:   176/  792, ite: 39748] train loss: 5.832936, tar: 0.627974 
l0: 0.234571, l1: 0.236040, l2: 0.236560, l3: 0.244905, l4: 0.282022, l5: 0.345073, l6: 0.463564

[epoch:   6/100, batch:   178/  792, ite: 39749] train loss: 5.831068, tar: 0.627749 
l0: 0.680250, l1: 0.689849, l2: 0.692615, l3: 0.703436, l4: 0.758066, l5: 0.811128, l6: 1.052300

[epoch:   6/100, batch:   180/  792, ite: 39750] train loss: 5.831509, tar: 0.62

[epoch:   6/100, batch:   260/  792, ite: 39790] train loss: 5.825379, tar: 0.627096 
l0: 0.363084, l1: 0.378980, l2: 0.387321, l3: 0.418414, l4: 0.473189, l5: 0.537779, l6: 0.574245

[epoch:   6/100, batch:   262/  792, ite: 39791] train loss: 5.824208, tar: 0.626949 
l0: 0.848503, l1: 0.863300, l2: 0.857659, l3: 0.877253, l4: 0.889419, l5: 1.059189, l6: 1.367594

[epoch:   6/100, batch:   264/  792, ite: 39792] train loss: 5.825534, tar: 0.627072 
l0: 0.385390, l1: 0.387570, l2: 0.390093, l3: 0.404404, l4: 0.429989, l5: 0.532407, l6: 0.684438

[epoch:   6/100, batch:   266/  792, ite: 39793] train loss: 5.824475, tar: 0.626938 
l0: 0.893227, l1: 0.892685, l2: 0.891690, l3: 0.869115, l4: 0.858141, l5: 0.861328, l6: 0.963994

[epoch:   6/100, batch:   268/  792, ite: 39794] train loss: 5.825334, tar: 0.627086 
l0: 0.565638, l1: 0.573708, l2: 0.573089, l3: 0.571316, l4: 0.592602, l5: 0.613643, l6: 0.719603

[epoch:   6/100, batch:   270/  792, ite: 39795] train loss: 5.824890, tar: 0.62

[epoch:   6/100, batch:   350/  792, ite: 39835] train loss: 5.823442, tar: 0.626561 
l0: 0.738734, l1: 0.749663, l2: 0.751554, l3: 0.763804, l4: 0.794735, l5: 0.915430, l6: 1.184303

[epoch:   6/100, batch:   352/  792, ite: 39836] train loss: 5.824134, tar: 0.626622 
l0: 0.831519, l1: 0.832937, l2: 0.833068, l3: 0.840834, l4: 0.888111, l5: 0.998187, l6: 1.692799

[epoch:   6/100, batch:   354/  792, ite: 39837] train loss: 5.825682, tar: 0.626734 
l0: 0.331246, l1: 0.334051, l2: 0.331393, l3: 0.341702, l4: 0.375066, l5: 0.501465, l6: 0.544791

[epoch:   6/100, batch:   356/  792, ite: 39838] train loss: 5.824402, tar: 0.626573 
l0: 0.455379, l1: 0.456056, l2: 0.456344, l3: 0.458938, l4: 0.478793, l5: 0.517161, l6: 0.710614

[epoch:   6/100, batch:   358/  792, ite: 39839] train loss: 5.823586, tar: 0.626480 
l0: 0.462859, l1: 0.463650, l2: 0.462312, l3: 0.463949, l4: 0.503577, l5: 0.594681, l6: 0.782016

[epoch:   6/100, batch:   360/  792, ite: 39840] train loss: 5.822878, tar: 0.62

[epoch:   6/100, batch:   440/  792, ite: 39880] train loss: 5.823855, tar: 0.626502 
l0: 0.431925, l1: 0.438502, l2: 0.433921, l3: 0.452935, l4: 0.468574, l5: 0.610279, l6: 0.773375

[epoch:   6/100, batch:   442/  792, ite: 39881] train loss: 5.823119, tar: 0.626399 
l0: 0.645572, l1: 0.647576, l2: 0.650438, l3: 0.656541, l4: 0.702333, l5: 0.754631, l6: 0.986407

[epoch:   6/100, batch:   444/  792, ite: 39882] train loss: 5.823245, tar: 0.626409 
l0: 0.494864, l1: 0.494216, l2: 0.494018, l3: 0.499454, l4: 0.515972, l5: 0.646908, l6: 0.803050

[epoch:   6/100, batch:   446/  792, ite: 39883] train loss: 5.822688, tar: 0.626339 
l0: 0.715890, l1: 0.711006, l2: 0.706636, l3: 0.704505, l4: 0.748915, l5: 0.901729, l6: 1.099396

[epoch:   6/100, batch:   448/  792, ite: 39884] train loss: 5.823214, tar: 0.626387 
l0: 0.616044, l1: 0.625252, l2: 0.623724, l3: 0.638474, l4: 0.643574, l5: 0.670322, l6: 0.799919

[epoch:   6/100, batch:   450/  792, ite: 39885] train loss: 5.823030, tar: 0.62

[epoch:   6/100, batch:   530/  792, ite: 39925] train loss: 5.819074, tar: 0.625910 
l0: 0.433267, l1: 0.431649, l2: 0.430160, l3: 0.443233, l4: 0.469512, l5: 0.525054, l6: 0.622593

[epoch:   6/100, batch:   532/  792, ite: 39926] train loss: 5.818166, tar: 0.625810 
l0: 0.568399, l1: 0.568820, l2: 0.568812, l3: 0.572428, l4: 0.625976, l5: 0.785416, l6: 0.928484

[epoch:   6/100, batch:   534/  792, ite: 39927] train loss: 5.818016, tar: 0.625780 
l0: 0.580258, l1: 0.588021, l2: 0.586881, l3: 0.590344, l4: 0.586499, l5: 0.667454, l6: 0.928689

[epoch:   6/100, batch:   536/  792, ite: 39928] train loss: 5.817838, tar: 0.625756 
l0: 0.769711, l1: 0.769592, l2: 0.769637, l3: 0.778861, l4: 0.819367, l5: 0.918456, l6: 1.174864

[epoch:   6/100, batch:   538/  792, ite: 39929] train loss: 5.818544, tar: 0.625831 
l0: 0.663239, l1: 0.672440, l2: 0.684467, l3: 0.674684, l4: 0.676672, l5: 0.857638, l6: 1.013881

[epoch:   6/100, batch:   540/  792, ite: 39930] train loss: 5.818771, tar: 0.62

[epoch:   6/100, batch:   620/  792, ite: 39970] train loss: 5.821408, tar: 0.626281 
l0: 0.704875, l1: 0.709555, l2: 0.723872, l3: 0.731812, l4: 0.768139, l5: 0.851978, l6: 0.842891

[epoch:   6/100, batch:   622/  792, ite: 39971] train loss: 5.821631, tar: 0.626321 
l0: 0.643314, l1: 0.643333, l2: 0.642798, l3: 0.631026, l4: 0.638868, l5: 0.793017, l6: 0.858781

[epoch:   6/100, batch:   624/  792, ite: 39972] train loss: 5.821609, tar: 0.626330 
l0: 0.404230, l1: 0.403480, l2: 0.401312, l3: 0.403437, l4: 0.423844, l5: 0.549118, l6: 0.740281

[epoch:   6/100, batch:   626/  792, ite: 39973] train loss: 5.820764, tar: 0.626217 
l0: 0.429516, l1: 0.429821, l2: 0.430599, l3: 0.436040, l4: 0.477873, l5: 0.538431, l6: 0.741298

[epoch:   6/100, batch:   628/  792, ite: 39974] train loss: 5.819952, tar: 0.626118 
l0: 0.545995, l1: 0.550679, l2: 0.542863, l3: 0.554586, l4: 0.581931, l5: 0.694605, l6: 0.889119

[epoch:   6/100, batch:   630/  792, ite: 39975] train loss: 5.819679, tar: 0.62

[epoch:   6/100, batch:   710/  792, ite: 40015] train loss: 7.225005, tar: 0.780093 
l0: 0.739375, l1: 0.729228, l2: 0.722520, l3: 0.731469, l4: 0.800200, l5: 0.912861, l6: 1.056027

[epoch:   6/100, batch:   712/  792, ite: 40016] train loss: 7.192394, tar: 0.777548 
l0: 0.742563, l1: 0.750496, l2: 0.752032, l3: 0.767240, l4: 0.792243, l5: 0.900229, l6: 1.106204

[epoch:   6/100, batch:   714/  792, ite: 40017] train loss: 7.173627, tar: 0.775490 
l0: 0.332842, l1: 0.340539, l2: 0.342481, l3: 0.354132, l4: 0.374829, l5: 0.495183, l6: 0.539918

[epoch:   6/100, batch:   716/  792, ite: 40018] train loss: 6.963286, tar: 0.750899 
l0: 0.898874, l1: 0.880352, l2: 0.878461, l3: 0.877686, l4: 0.896373, l5: 0.882180, l6: 0.986277

[epoch:   6/100, batch:   718/  792, ite: 40019] train loss: 6.983191, tar: 0.758687 
l0: 0.211796, l1: 0.218000, l2: 0.218232, l3: 0.228550, l4: 0.268910, l5: 0.315389, l6: 0.421564

[epoch:   6/100, batch:   720/  792, ite: 40020] train loss: 6.753987, tar: 0.73

l0: 0.503811, l1: 0.509639, l2: 0.501113, l3: 0.500087, l4: 0.557617, l5: 0.644873, l6: 0.754005

[epoch:   7/100, batch:     8/  792, ite: 40060] train loss: 5.976422, tar: 0.647319 
l0: 0.542154, l1: 0.557438, l2: 0.558871, l3: 0.564878, l4: 0.583195, l5: 0.652398, l6: 0.777189

[epoch:   7/100, batch:    10/  792, ite: 40061] train loss: 5.961270, tar: 0.645595 
l0: 0.598759, l1: 0.600299, l2: 0.601880, l3: 0.605911, l4: 0.589413, l5: 0.662763, l6: 0.863227

[epoch:   7/100, batch:    12/  792, ite: 40062] train loss: 5.952653, tar: 0.644840 
l0: 0.543713, l1: 0.543554, l2: 0.543354, l3: 0.553661, l4: 0.569777, l5: 0.647213, l6: 0.859771

[epoch:   7/100, batch:    14/  792, ite: 40063] train loss: 5.940338, tar: 0.643234 
l0: 0.712587, l1: 0.718607, l2: 0.723121, l3: 0.727473, l4: 0.736429, l5: 0.900273, l6: 1.076219

[epoch:   7/100, batch:    16/  792, ite: 40064] train loss: 5.954502, tar: 0.644318 
l0: 0.798810, l1: 0.797250, l2: 0.798597, l3: 0.809150, l4: 0.850837, l5: 0.9912

[epoch:   7/100, batch:    96/  792, ite: 40104] train loss: 5.809465, tar: 0.627319 
l0: 0.680947, l1: 0.677845, l2: 0.677153, l3: 0.679097, l4: 0.707352, l5: 0.807985, l6: 0.959378

[epoch:   7/100, batch:    98/  792, ite: 40105] train loss: 5.813890, tar: 0.627830 
l0: 0.778561, l1: 0.784043, l2: 0.762673, l3: 0.761348, l4: 0.747637, l5: 0.711423, l6: 0.975613

[epoch:   7/100, batch:   100/  792, ite: 40106] train loss: 5.821371, tar: 0.629252 
l0: 0.425190, l1: 0.425690, l2: 0.426428, l3: 0.424272, l4: 0.459472, l5: 0.560371, l6: 0.753424

[epoch:   7/100, batch:   102/  792, ite: 40107] train loss: 5.806686, tar: 0.627345 
l0: 0.936692, l1: 0.940368, l2: 0.943104, l3: 0.949932, l4: 0.969048, l5: 1.120077, l6: 1.186642

[epoch:   7/100, batch:   104/  792, ite: 40108] train loss: 5.830559, tar: 0.630209 
l0: 0.649462, l1: 0.654681, l2: 0.647981, l3: 0.657042, l4: 0.681369, l5: 0.711246, l6: 0.893262

[epoch:   7/100, batch:   106/  792, ite: 40109] train loss: 5.830497, tar: 0.63

[epoch:   7/100, batch:   186/  792, ite: 40149] train loss: 5.806604, tar: 0.626981 
l0: 0.810551, l1: 0.823863, l2: 0.824643, l3: 0.821255, l4: 0.837122, l5: 0.885428, l6: 1.088719

[epoch:   7/100, batch:   188/  792, ite: 40150] train loss: 5.817041, tar: 0.628205 
l0: 0.552748, l1: 0.567387, l2: 0.569961, l3: 0.580795, l4: 0.614206, l5: 0.820629, l6: 0.857852

[epoch:   7/100, batch:   190/  792, ite: 40151] train loss: 5.814777, tar: 0.627705 
l0: 0.606902, l1: 0.602855, l2: 0.603577, l3: 0.612338, l4: 0.656389, l5: 0.652069, l6: 0.771318

[epoch:   7/100, batch:   192/  792, ite: 40152] train loss: 5.812997, tar: 0.627569 
l0: 0.602013, l1: 0.604742, l2: 0.610290, l3: 0.614575, l4: 0.655590, l5: 0.767591, l6: 0.817057

[epoch:   7/100, batch:   194/  792, ite: 40153] train loss: 5.811005, tar: 0.627401 
l0: 1.015896, l1: 1.039165, l2: 1.019659, l3: 1.031186, l4: 1.072052, l5: 1.175150, l6: 1.626853

[epoch:   7/100, batch:   196/  792, ite: 40154] train loss: 5.836113, tar: 0.62

[epoch:   7/100, batch:   276/  792, ite: 40194] train loss: 5.812898, tar: 0.626787 
l0: 0.423804, l1: 0.426221, l2: 0.424659, l3: 0.424616, l4: 0.451075, l5: 0.590593, l6: 0.909234

[epoch:   7/100, batch:   278/  792, ite: 40195] train loss: 5.807228, tar: 0.625746 
l0: 0.488952, l1: 0.486816, l2: 0.488013, l3: 0.486928, l4: 0.512429, l5: 0.574991, l6: 0.714933

[epoch:   7/100, batch:   280/  792, ite: 40196] train loss: 5.800294, tar: 0.625048 
l0: 1.079535, l1: 1.081483, l2: 1.086103, l3: 1.095911, l4: 1.104480, l5: 1.185363, l6: 1.404188

[epoch:   7/100, batch:   282/  792, ite: 40197] train loss: 5.819413, tar: 0.627355 
l0: 1.176076, l1: 1.193372, l2: 1.201600, l3: 1.238629, l4: 1.241338, l5: 1.154828, l6: 1.558713

[epoch:   7/100, batch:   284/  792, ite: 40198] train loss: 5.842765, tar: 0.630126 
l0: 0.519677, l1: 0.525254, l2: 0.521967, l3: 0.523594, l4: 0.565011, l5: 0.637562, l6: 0.992937

[epoch:   7/100, batch:   286/  792, ite: 40199] train loss: 5.841453, tar: 0.62

[epoch:   7/100, batch:   366/  792, ite: 40239] train loss: 5.754601, tar: 0.617634 
l0: 0.434923, l1: 0.439442, l2: 0.433472, l3: 0.432799, l4: 0.454514, l5: 0.526797, l6: 0.587417

[epoch:   7/100, batch:   368/  792, ite: 40240] train loss: 5.746788, tar: 0.616872 
l0: 0.704230, l1: 0.733379, l2: 0.749367, l3: 0.764941, l4: 0.815204, l5: 0.960219, l6: 1.069542

[epoch:   7/100, batch:   370/  792, ite: 40241] train loss: 5.752095, tar: 0.617235 
l0: 0.455228, l1: 0.456789, l2: 0.458516, l3: 0.457266, l4: 0.473820, l5: 0.571891, l6: 0.710629

[epoch:   7/100, batch:   372/  792, ite: 40242] train loss: 5.746602, tar: 0.616565 
l0: 0.563542, l1: 0.571793, l2: 0.574999, l3: 0.566707, l4: 0.591676, l5: 0.723097, l6: 0.929253

[epoch:   7/100, batch:   374/  792, ite: 40243] train loss: 5.745752, tar: 0.616347 
l0: 0.587106, l1: 0.589636, l2: 0.587387, l3: 0.590866, l4: 0.624632, l5: 0.702117, l6: 1.014488

[epoch:   7/100, batch:   376/  792, ite: 40244] train loss: 5.746034, tar: 0.61

[epoch:   7/100, batch:   456/  792, ite: 40284] train loss: 5.750168, tar: 0.617801 
l0: 0.420068, l1: 0.423761, l2: 0.424499, l3: 0.424141, l4: 0.441233, l5: 0.475943, l6: 0.663801

[epoch:   7/100, batch:   458/  792, ite: 40285] train loss: 5.743933, tar: 0.617107 
l0: 0.726957, l1: 0.735831, l2: 0.744860, l3: 0.752983, l4: 0.768383, l5: 0.893736, l6: 1.075144

[epoch:   7/100, batch:   460/  792, ite: 40286] train loss: 5.747755, tar: 0.617491 
l0: 0.579815, l1: 0.581006, l2: 0.580612, l3: 0.581157, l4: 0.617400, l5: 0.711440, l6: 0.958695

[epoch:   7/100, batch:   462/  792, ite: 40287] train loss: 5.747637, tar: 0.617360 
l0: 0.263291, l1: 0.267788, l2: 0.266121, l3: 0.270656, l4: 0.307329, l5: 0.381490, l6: 0.531986

[epoch:   7/100, batch:   464/  792, ite: 40288] train loss: 5.737558, tar: 0.616131 
l0: 0.342599, l1: 0.351751, l2: 0.350940, l3: 0.367691, l4: 0.390506, l5: 0.474093, l6: 0.563259

[epoch:   7/100, batch:   466/  792, ite: 40289] train loss: 5.729650, tar: 0.61

[epoch:   7/100, batch:   546/  792, ite: 40329] train loss: 5.710031, tar: 0.613333 
l0: 0.500147, l1: 0.511742, l2: 0.508250, l3: 0.500448, l4: 0.526488, l5: 0.648025, l6: 0.743262

[epoch:   7/100, batch:   548/  792, ite: 40330] train loss: 5.707382, tar: 0.612990 
l0: 0.685304, l1: 0.689851, l2: 0.692781, l3: 0.695734, l4: 0.725433, l5: 0.797024, l6: 0.909920

[epoch:   7/100, batch:   550/  792, ite: 40331] train loss: 5.708750, tar: 0.613209 
l0: 0.922891, l1: 0.925804, l2: 0.930811, l3: 0.941069, l4: 0.962309, l5: 0.990081, l6: 1.074723

[epoch:   7/100, batch:   552/  792, ite: 40332] train loss: 5.715174, tar: 0.614141 
l0: 0.233264, l1: 0.234520, l2: 0.235133, l3: 0.243415, l4: 0.281565, l5: 0.337267, l6: 0.459316

[epoch:   7/100, batch:   554/  792, ite: 40333] train loss: 5.705478, tar: 0.612998 
l0: 0.835675, l1: 0.836357, l2: 0.838141, l3: 0.844086, l4: 0.855415, l5: 0.819141, l6: 0.970543

[epoch:   7/100, batch:   556/  792, ite: 40334] train loss: 5.709629, tar: 0.61

[epoch:   7/100, batch:   636/  792, ite: 40374] train loss: 5.747586, tar: 0.616957 
l0: 0.534211, l1: 0.539204, l2: 0.531976, l3: 0.527687, l4: 0.523841, l5: 0.692789, l6: 0.668992

[epoch:   7/100, batch:   638/  792, ite: 40375] train loss: 5.744801, tar: 0.616737 
l0: 0.627481, l1: 0.630728, l2: 0.630805, l3: 0.624491, l4: 0.660421, l5: 0.749118, l6: 0.837548

[epoch:   7/100, batch:   640/  792, ite: 40376] train loss: 5.744460, tar: 0.616765 
l0: 0.333588, l1: 0.342847, l2: 0.339351, l3: 0.355543, l4: 0.396912, l5: 0.531866, l6: 0.778814

[epoch:   7/100, batch:   642/  792, ite: 40377] train loss: 5.739329, tar: 0.616014 
l0: 0.513456, l1: 0.508271, l2: 0.504623, l3: 0.504709, l4: 0.521919, l5: 0.548095, l6: 0.766928

[epoch:   7/100, batch:   644/  792, ite: 40378] train loss: 5.736279, tar: 0.615743 
l0: 0.591385, l1: 0.602175, l2: 0.599596, l3: 0.595011, l4: 0.626653, l5: 0.689683, l6: 0.868578

[epoch:   7/100, batch:   646/  792, ite: 40379] train loss: 5.735949, tar: 0.61

[epoch:   7/100, batch:   726/  792, ite: 40419] train loss: 5.786524, tar: 0.622973 
l0: 1.536868, l1: 1.552155, l2: 1.521402, l3: 1.531205, l4: 1.564315, l5: 1.555523, l6: 2.187241

[epoch:   7/100, batch:   728/  792, ite: 40420] train loss: 5.805804, tar: 0.625149 
l0: 0.412543, l1: 0.416588, l2: 0.417321, l3: 0.423285, l4: 0.467436, l5: 0.612728, l6: 0.727905

[epoch:   7/100, batch:   730/  792, ite: 40421] train loss: 5.802149, tar: 0.624644 
l0: 0.374296, l1: 0.377637, l2: 0.375225, l3: 0.382113, l4: 0.396189, l5: 0.502405, l6: 0.620448

[epoch:   7/100, batch:   732/  792, ite: 40422] train loss: 5.797230, tar: 0.624050 
l0: 0.736515, l1: 0.738577, l2: 0.742447, l3: 0.734639, l4: 0.757335, l5: 0.852037, l6: 0.808524

[epoch:   7/100, batch:   734/  792, ite: 40423] train loss: 5.797905, tar: 0.624316 
l0: 0.386728, l1: 0.393248, l2: 0.396637, l3: 0.405864, l4: 0.430746, l5: 0.629299, l6: 0.620760

[epoch:   7/100, batch:   736/  792, ite: 40424] train loss: 5.793557, tar: 0.62

l0: 0.842689, l1: 0.847070, l2: 0.847182, l3: 0.858724, l4: 0.909496, l5: 1.040327, l6: 1.385346

[epoch:   8/100, batch:    24/  792, ite: 40464] train loss: 5.806518, tar: 0.624511 
l0: 0.405113, l1: 0.411206, l2: 0.410776, l3: 0.412065, l4: 0.426923, l5: 0.476966, l6: 0.557137

[epoch:   8/100, batch:    26/  792, ite: 40465] train loss: 5.802094, tar: 0.624039 
l0: 0.538938, l1: 0.544158, l2: 0.550025, l3: 0.557606, l4: 0.611347, l5: 0.670829, l6: 0.884266

[epoch:   8/100, batch:    28/  792, ite: 40466] train loss: 5.800983, tar: 0.623856 
l0: 0.859795, l1: 0.846352, l2: 0.845433, l3: 0.843819, l4: 0.851011, l5: 0.887277, l6: 0.993251

[epoch:   8/100, batch:    30/  792, ite: 40467] train loss: 5.803969, tar: 0.624361 
l0: 0.703849, l1: 0.707797, l2: 0.707966, l3: 0.706028, l4: 0.717979, l5: 0.783023, l6: 0.869799

[epoch:   8/100, batch:    32/  792, ite: 40468] train loss: 5.804491, tar: 0.624531 
l0: 0.491170, l1: 0.497196, l2: 0.495548, l3: 0.501530, l4: 0.532306, l5: 0.6983

[epoch:   8/100, batch:   112/  792, ite: 40508] train loss: 5.797053, tar: 0.623856 
l0: 0.366338, l1: 0.364947, l2: 0.368105, l3: 0.381315, l4: 0.402892, l5: 0.467687, l6: 0.601610

[epoch:   8/100, batch:   114/  792, ite: 40509] train loss: 5.792741, tar: 0.623350 
l0: 0.613841, l1: 0.615838, l2: 0.613502, l3: 0.616100, l4: 0.639736, l5: 0.784667, l6: 1.021298

[epoch:   8/100, batch:   116/  792, ite: 40510] train loss: 5.793036, tar: 0.623332 
l0: 0.850325, l1: 0.854357, l2: 0.850815, l3: 0.846298, l4: 0.863944, l5: 0.891601, l6: 1.434284

[epoch:   8/100, batch:   118/  792, ite: 40511] train loss: 5.797830, tar: 0.623776 
l0: 0.679146, l1: 0.687682, l2: 0.697519, l3: 0.690813, l4: 0.718576, l5: 0.730638, l6: 0.853122

[epoch:   8/100, batch:   120/  792, ite: 40512] train loss: 5.798354, tar: 0.623884 
l0: 0.865310, l1: 0.878332, l2: 0.875956, l3: 0.872269, l4: 0.898021, l5: 0.953645, l6: 1.039443

[epoch:   8/100, batch:   122/  792, ite: 40513] train loss: 5.801719, tar: 0.62

[epoch:   8/100, batch:   202/  792, ite: 40553] train loss: 5.788214, tar: 0.623839 
l0: 0.437135, l1: 0.441274, l2: 0.437955, l3: 0.427638, l4: 0.517537, l5: 0.612242, l6: 0.917005

[epoch:   8/100, batch:   204/  792, ite: 40554] train loss: 5.786335, tar: 0.623502 
l0: 0.740892, l1: 0.740281, l2: 0.742871, l3: 0.758475, l4: 0.806944, l5: 0.967193, l6: 1.469139

[epoch:   8/100, batch:   206/  792, ite: 40555] train loss: 5.789493, tar: 0.623714 
l0: 0.732816, l1: 0.742182, l2: 0.749043, l3: 0.761210, l4: 0.766316, l5: 0.856945, l6: 1.089420

[epoch:   8/100, batch:   208/  792, ite: 40556] train loss: 5.791251, tar: 0.623910 
l0: 0.633231, l1: 0.632386, l2: 0.638144, l3: 0.637435, l4: 0.691572, l5: 0.844959, l6: 1.065832

[epoch:   8/100, batch:   210/  792, ite: 40557] train loss: 5.792023, tar: 0.623927 
l0: 0.348817, l1: 0.350067, l2: 0.352518, l3: 0.368444, l4: 0.399169, l5: 0.501445, l6: 0.531116

[epoch:   8/100, batch:   212/  792, ite: 40558] train loss: 5.787837, tar: 0.62

[epoch:   8/100, batch:   292/  792, ite: 40598] train loss: 5.756088, tar: 0.620140 
l0: 0.556592, l1: 0.556890, l2: 0.558132, l3: 0.551458, l4: 0.582463, l5: 0.671035, l6: 0.913436

[epoch:   8/100, batch:   294/  792, ite: 40599] train loss: 5.755443, tar: 0.620034 
l0: 0.416483, l1: 0.417481, l2: 0.420391, l3: 0.428354, l4: 0.466163, l5: 0.556543, l6: 0.703219

[epoch:   8/100, batch:   296/  792, ite: 40600] train loss: 5.752879, tar: 0.619695 
l0: 0.458551, l1: 0.461535, l2: 0.465739, l3: 0.472212, l4: 0.486319, l5: 0.534218, l6: 0.583729

[epoch:   8/100, batch:   298/  792, ite: 40601] train loss: 5.750176, tar: 0.619427 
l0: 0.484980, l1: 0.484899, l2: 0.482979, l3: 0.487691, l4: 0.491866, l5: 0.607691, l6: 0.852178

[epoch:   8/100, batch:   300/  792, ite: 40602] train loss: 5.748612, tar: 0.619204 
l0: 0.488179, l1: 0.493328, l2: 0.488796, l3: 0.488064, l4: 0.489067, l5: 0.571098, l6: 0.704599

[epoch:   8/100, batch:   302/  792, ite: 40603] train loss: 5.746627, tar: 0.61

[epoch:   8/100, batch:   382/  792, ite: 40643] train loss: 5.771873, tar: 0.621602 
l0: 0.309737, l1: 0.310609, l2: 0.308869, l3: 0.308021, l4: 0.335427, l5: 0.397114, l6: 0.446008

[epoch:   8/100, batch:   384/  792, ite: 40644] train loss: 5.767441, tar: 0.621118 
l0: 0.309324, l1: 0.316925, l2: 0.313024, l3: 0.308563, l4: 0.320844, l5: 0.479873, l6: 0.535440

[epoch:   8/100, batch:   386/  792, ite: 40645] train loss: 5.763383, tar: 0.620634 
l0: 0.713418, l1: 0.710324, l2: 0.710937, l3: 0.713131, l4: 0.749263, l5: 0.830742, l6: 0.945296

[epoch:   8/100, batch:   388/  792, ite: 40646] train loss: 5.764345, tar: 0.620778 
l0: 0.543503, l1: 0.547050, l2: 0.543320, l3: 0.533520, l4: 0.538839, l5: 0.586788, l6: 0.719001

[epoch:   8/100, batch:   390/  792, ite: 40647] train loss: 5.762652, tar: 0.620659 
l0: 0.936845, l1: 0.941739, l2: 0.937141, l3: 0.943843, l4: 0.981191, l5: 1.027239, l6: 1.220471

[epoch:   8/100, batch:   392/  792, ite: 40648] train loss: 5.766638, tar: 0.62

[epoch:   8/100, batch:   472/  792, ite: 40688] train loss: 5.757320, tar: 0.619356 
l0: 0.394520, l1: 0.395077, l2: 0.392165, l3: 0.395377, l4: 0.429414, l5: 0.526439, l6: 0.549052

[epoch:   8/100, batch:   474/  792, ite: 40689] train loss: 5.754558, tar: 0.619030 
l0: 0.696911, l1: 0.717272, l2: 0.716722, l3: 0.734375, l4: 0.771911, l5: 0.955944, l6: 1.084075

[epoch:   8/100, batch:   476/  792, ite: 40690] train loss: 5.757067, tar: 0.619143 
l0: 0.405039, l1: 0.414823, l2: 0.413254, l3: 0.424280, l4: 0.471415, l5: 0.635827, l6: 0.773652

[epoch:   8/100, batch:   478/  792, ite: 40691] train loss: 5.755085, tar: 0.618833 
l0: 0.475657, l1: 0.471687, l2: 0.473510, l3: 0.463793, l4: 0.481305, l5: 0.633646, l6: 0.714354

[epoch:   8/100, batch:   480/  792, ite: 40692] train loss: 5.753279, tar: 0.618626 
l0: 0.448611, l1: 0.456378, l2: 0.456135, l3: 0.466413, l4: 0.513310, l5: 0.645881, l6: 0.688017

[epoch:   8/100, batch:   482/  792, ite: 40693] train loss: 5.751390, tar: 0.61

[epoch:   8/100, batch:   562/  792, ite: 40733] train loss: 5.738068, tar: 0.616834 
l0: 0.584575, l1: 0.585594, l2: 0.587304, l3: 0.591927, l4: 0.615857, l5: 0.632588, l6: 0.687798

[epoch:   8/100, batch:   564/  792, ite: 40734] train loss: 5.737132, tar: 0.616790 
l0: 0.378887, l1: 0.373513, l2: 0.374867, l3: 0.387727, l4: 0.406979, l5: 0.516488, l6: 0.558813

[epoch:   8/100, batch:   566/  792, ite: 40735] train loss: 5.734422, tar: 0.616466 
l0: 0.531528, l1: 0.531632, l2: 0.532544, l3: 0.539920, l4: 0.552512, l5: 0.659881, l6: 0.853352

[epoch:   8/100, batch:   568/  792, ite: 40736] train loss: 5.733679, tar: 0.616351 
l0: 0.733340, l1: 0.732749, l2: 0.735716, l3: 0.734856, l4: 0.780729, l5: 0.899024, l6: 1.487866

[epoch:   8/100, batch:   570/  792, ite: 40737] train loss: 5.736505, tar: 0.616509 
l0: 0.790984, l1: 0.800121, l2: 0.804637, l3: 0.802990, l4: 0.846320, l5: 0.873604, l6: 1.514389

[epoch:   8/100, batch:   572/  792, ite: 40738] train loss: 5.740110, tar: 0.61

[epoch:   8/100, batch:   652/  792, ite: 40778] train loss: 5.754421, tar: 0.617725 
l0: 0.625548, l1: 0.630804, l2: 0.630354, l3: 0.632112, l4: 0.651944, l5: 0.652661, l6: 0.743414

[epoch:   8/100, batch:   654/  792, ite: 40779] train loss: 5.753954, tar: 0.617735 
l0: 0.903675, l1: 0.889729, l2: 0.893167, l3: 0.898923, l4: 0.951241, l5: 0.928647, l6: 0.948252

[epoch:   8/100, batch:   656/  792, ite: 40780] train loss: 5.756164, tar: 0.618102 
l0: 0.501993, l1: 0.508020, l2: 0.509632, l3: 0.514746, l4: 0.543686, l5: 0.656965, l6: 0.790402

[epoch:   8/100, batch:   658/  792, ite: 40781] train loss: 5.755084, tar: 0.617953 
l0: 0.386554, l1: 0.385149, l2: 0.382299, l3: 0.378856, l4: 0.413457, l5: 0.496032, l6: 0.605609

[epoch:   8/100, batch:   660/  792, ite: 40782] train loss: 5.752375, tar: 0.617657 
l0: 0.448620, l1: 0.454094, l2: 0.457768, l3: 0.472042, l4: 0.510921, l5: 0.600994, l6: 0.792842

[epoch:   8/100, batch:   662/  792, ite: 40783] train loss: 5.750836, tar: 0.61

[epoch:   8/100, batch:   742/  792, ite: 40823] train loss: 5.751229, tar: 0.617697 
l0: 0.688689, l1: 0.696957, l2: 0.703994, l3: 0.697439, l4: 0.725472, l5: 0.695468, l6: 0.857345

[epoch:   8/100, batch:   744/  792, ite: 40824] train loss: 5.751668, tar: 0.617783 
l0: 0.429998, l1: 0.437920, l2: 0.436004, l3: 0.437768, l4: 0.472914, l5: 0.553766, l6: 0.599634

[epoch:   8/100, batch:   746/  792, ite: 40825] train loss: 5.749632, tar: 0.617555 
l0: 0.634835, l1: 0.638431, l2: 0.634338, l3: 0.632987, l4: 0.649834, l5: 0.754487, l6: 0.942473

[epoch:   8/100, batch:   748/  792, ite: 40826] train loss: 5.749926, tar: 0.617576 
l0: 0.714444, l1: 0.712860, l2: 0.714900, l3: 0.721180, l4: 0.728324, l5: 0.754408, l6: 0.905039

[epoch:   8/100, batch:   750/  792, ite: 40827] train loss: 5.750609, tar: 0.617693 
l0: 0.601923, l1: 0.611237, l2: 0.614620, l3: 0.626751, l4: 0.660667, l5: 0.782318, l6: 1.075131

[epoch:   8/100, batch:   752/  792, ite: 40828] train loss: 5.750975, tar: 0.61

l0: 0.687250, l1: 0.684278, l2: 0.676729, l3: 0.669466, l4: 0.697565, l5: 0.826039, l6: 0.932222

[epoch:   9/100, batch:    40/  792, ite: 40868] train loss: 5.768317, tar: 0.620121 
l0: 0.952606, l1: 0.955429, l2: 0.953754, l3: 0.946179, l4: 0.924208, l5: 0.987517, l6: 1.057618

[epoch:   9/100, batch:    42/  792, ite: 40869] train loss: 5.770673, tar: 0.620504 
l0: 0.300219, l1: 0.302593, l2: 0.308105, l3: 0.315175, l4: 0.380254, l5: 0.469910, l6: 0.681709

[epoch:   9/100, batch:    44/  792, ite: 40870] train loss: 5.768067, tar: 0.620136 
l0: 0.671171, l1: 0.672341, l2: 0.673477, l3: 0.671544, l4: 0.695700, l5: 0.851946, l6: 1.030336

[epoch:   9/100, batch:    46/  792, ite: 40871] train loss: 5.768718, tar: 0.620195 
l0: 0.808815, l1: 0.796411, l2: 0.794825, l3: 0.778928, l4: 0.810031, l5: 1.008831, l6: 1.148884

[epoch:   9/100, batch:    48/  792, ite: 40872] train loss: 5.770662, tar: 0.620411 
l0: 0.460537, l1: 0.460049, l2: 0.462552, l3: 0.465646, l4: 0.473623, l5: 0.5154

[epoch:   9/100, batch:   128/  792, ite: 40912] train loss: 5.772832, tar: 0.620489 
l0: 0.828156, l1: 0.832192, l2: 0.830289, l3: 0.844730, l4: 0.861086, l5: 0.897681, l6: 1.525206

[epoch:   9/100, batch:   130/  792, ite: 40913] train loss: 5.775662, tar: 0.620717 
l0: 0.879965, l1: 0.891483, l2: 0.887187, l3: 0.879555, l4: 0.851425, l5: 0.890011, l6: 0.899918

[epoch:   9/100, batch:   132/  792, ite: 40914] train loss: 5.777016, tar: 0.621000 
l0: 0.388012, l1: 0.389322, l2: 0.384660, l3: 0.395739, l4: 0.418989, l5: 0.510402, l6: 0.730326

[epoch:   9/100, batch:   134/  792, ite: 40915] train loss: 5.774963, tar: 0.620746 
l0: 0.659037, l1: 0.655428, l2: 0.653627, l3: 0.665992, l4: 0.714887, l5: 0.765580, l6: 0.999922

[epoch:   9/100, batch:   136/  792, ite: 40916] train loss: 5.775451, tar: 0.620787 
l0: 0.338703, l1: 0.340215, l2: 0.342833, l3: 0.352820, l4: 0.367690, l5: 0.445861, l6: 0.520832

[epoch:   9/100, batch:   138/  792, ite: 40917] train loss: 5.772734, tar: 0.62

[epoch:   9/100, batch:   218/  792, ite: 40957] train loss: 5.784065, tar: 0.622014 
l0: 0.663308, l1: 0.658486, l2: 0.650429, l3: 0.642173, l4: 0.659778, l5: 0.644054, l6: 0.745134

[epoch:   9/100, batch:   220/  792, ite: 40958] train loss: 5.783730, tar: 0.622057 
l0: 0.386414, l1: 0.392784, l2: 0.393828, l3: 0.408193, l4: 0.456546, l5: 0.569189, l6: 0.679563

[epoch:   9/100, batch:   222/  792, ite: 40959] train loss: 5.781845, tar: 0.621811 
l0: 0.614115, l1: 0.613982, l2: 0.617181, l3: 0.621417, l4: 0.649459, l5: 0.752403, l6: 1.153173

[epoch:   9/100, batch:   224/  792, ite: 40960] train loss: 5.782407, tar: 0.621803 
l0: 0.343650, l1: 0.350662, l2: 0.350186, l3: 0.359533, l4: 0.370100, l5: 0.420742, l6: 0.525641

[epoch:   9/100, batch:   226/  792, ite: 40961] train loss: 5.779804, tar: 0.621513 
l0: 0.731636, l1: 0.734165, l2: 0.705006, l3: 0.711990, l4: 0.690401, l5: 0.740466, l6: 0.927302

[epoch:   9/100, batch:   228/  792, ite: 40962] train loss: 5.780167, tar: 0.62

[epoch:   9/100, batch:   308/  792, ite: 41002] train loss: 5.769396, tar: 0.620118 
l0: 0.803075, l1: 0.803824, l2: 0.806777, l3: 0.812883, l4: 0.872748, l5: 1.017474, l6: 1.293264

[epoch:   9/100, batch:   310/  792, ite: 41003] train loss: 5.771407, tar: 0.620301 
l0: 1.122705, l1: 1.139064, l2: 1.135023, l3: 1.143811, l4: 1.146325, l5: 1.235750, l6: 1.327043

[epoch:   9/100, batch:   312/  792, ite: 41004] train loss: 5.775211, tar: 0.620801 
l0: 0.729622, l1: 0.719537, l2: 0.723144, l3: 0.719750, l4: 0.777552, l5: 0.837539, l6: 1.061612

[epoch:   9/100, batch:   314/  792, ite: 41005] train loss: 5.776246, tar: 0.620910 
l0: 0.459168, l1: 0.462842, l2: 0.465239, l3: 0.477189, l4: 0.521479, l5: 0.619242, l6: 0.717456

[epoch:   9/100, batch:   316/  792, ite: 41006] train loss: 5.775103, tar: 0.620749 
l0: 0.696920, l1: 0.704797, l2: 0.703957, l3: 0.710280, l4: 0.724790, l5: 0.743905, l6: 0.976471

[epoch:   9/100, batch:   318/  792, ite: 41007] train loss: 5.775617, tar: 0.62

[epoch:   9/100, batch:   398/  792, ite: 41047] train loss: 5.763587, tar: 0.619253 
l0: 0.615336, l1: 0.620091, l2: 0.620126, l3: 0.622356, l4: 0.650895, l5: 0.688394, l6: 0.849763

[epoch:   9/100, batch:   400/  792, ite: 41048] train loss: 5.763427, tar: 0.619249 
l0: 0.199147, l1: 0.204928, l2: 0.209107, l3: 0.216826, l4: 0.240662, l5: 0.310942, l6: 0.469428

[epoch:   9/100, batch:   402/  792, ite: 41049] train loss: 5.760202, tar: 0.618849 
l0: 0.847700, l1: 0.850275, l2: 0.860348, l3: 0.877524, l4: 0.905463, l5: 0.918270, l6: 1.269814

[epoch:   9/100, batch:   404/  792, ite: 41050] train loss: 5.762144, tar: 0.619067 
l0: 0.892206, l1: 0.879994, l2: 0.888332, l3: 0.899459, l4: 0.892112, l5: 0.886912, l6: 0.990069

[epoch:   9/100, batch:   406/  792, ite: 41051] train loss: 5.763652, tar: 0.619326 
l0: 0.499414, l1: 0.506406, l2: 0.500883, l3: 0.493999, l4: 0.530990, l5: 0.635865, l6: 0.910394

[epoch:   9/100, batch:   408/  792, ite: 41052] train loss: 5.762915, tar: 0.61

[epoch:   9/100, batch:   488/  792, ite: 41092] train loss: 5.762345, tar: 0.619120 
l0: 0.478647, l1: 0.474673, l2: 0.472875, l3: 0.468647, l4: 0.480159, l5: 0.548499, l6: 0.719248

[epoch:   9/100, batch:   490/  792, ite: 41093] train loss: 5.761154, tar: 0.618992 
l0: 0.297412, l1: 0.301529, l2: 0.304271, l3: 0.317987, l4: 0.346610, l5: 0.526971, l6: 0.731847

[epoch:   9/100, batch:   492/  792, ite: 41094] train loss: 5.759177, tar: 0.618698 
l0: 0.828864, l1: 0.833302, l2: 0.832921, l3: 0.833901, l4: 0.864199, l5: 0.989461, l6: 1.359129

[epoch:   9/100, batch:   494/  792, ite: 41095] train loss: 5.761300, tar: 0.618890 
l0: 0.617365, l1: 0.622787, l2: 0.623856, l3: 0.629017, l4: 0.682401, l5: 0.761046, l6: 0.937389

[epoch:   9/100, batch:   496/  792, ite: 41096] train loss: 5.761497, tar: 0.618888 
l0: 0.638817, l1: 0.640778, l2: 0.648091, l3: 0.659011, l4: 0.733692, l5: 0.823727, l6: 0.986899

[epoch:   9/100, batch:   498/  792, ite: 41097] train loss: 5.761902, tar: 0.61

[epoch:   9/100, batch:   578/  792, ite: 41137] train loss: 5.755260, tar: 0.618184 
l0: 0.554456, l1: 0.551316, l2: 0.552394, l3: 0.549151, l4: 0.574891, l5: 0.642630, l6: 0.846139

[epoch:   9/100, batch:   580/  792, ite: 41138] train loss: 5.754702, tar: 0.618128 
l0: 0.749836, l1: 0.751920, l2: 0.758546, l3: 0.738509, l4: 0.739486, l5: 0.784270, l6: 0.892051

[epoch:   9/100, batch:   582/  792, ite: 41139] train loss: 5.755297, tar: 0.618243 
l0: 0.906253, l1: 0.903696, l2: 0.903637, l3: 0.906898, l4: 0.938415, l5: 1.024965, l6: 1.298986

[epoch:   9/100, batch:   584/  792, ite: 41140] train loss: 5.757574, tar: 0.618496 
l0: 0.723233, l1: 0.731056, l2: 0.734758, l3: 0.757795, l4: 0.799186, l5: 0.852038, l6: 0.964183

[epoch:   9/100, batch:   586/  792, ite: 41141] train loss: 5.758409, tar: 0.618588 
l0: 0.762042, l1: 0.785797, l2: 0.788190, l3: 0.793983, l4: 0.800576, l5: 0.805992, l6: 0.936463

[epoch:   9/100, batch:   588/  792, ite: 41142] train loss: 5.759254, tar: 0.61

[epoch:   9/100, batch:   668/  792, ite: 41182] train loss: 5.757896, tar: 0.618602 
l0: 0.723727, l1: 0.727968, l2: 0.724149, l3: 0.727376, l4: 0.767474, l5: 0.885292, l6: 1.173199

[epoch:   9/100, batch:   670/  792, ite: 41183] train loss: 5.758969, tar: 0.618691 
l0: 0.516995, l1: 0.516139, l2: 0.516567, l3: 0.521139, l4: 0.567381, l5: 0.691657, l6: 0.914526

[epoch:   9/100, batch:   672/  792, ite: 41184] train loss: 5.758542, tar: 0.618605 
l0: 0.751108, l1: 0.754229, l2: 0.751635, l3: 0.750172, l4: 0.712072, l5: 0.731495, l6: 0.856953

[epoch:   9/100, batch:   674/  792, ite: 41185] train loss: 5.759069, tar: 0.618717 
l0: 0.377724, l1: 0.382448, l2: 0.385518, l3: 0.394367, l4: 0.431342, l5: 0.538532, l6: 0.678179

[epoch:   9/100, batch:   676/  792, ite: 41186] train loss: 5.757505, tar: 0.618514 
l0: 0.503403, l1: 0.504651, l2: 0.504507, l3: 0.504734, l4: 0.542119, l5: 0.633177, l6: 0.831356

[epoch:   9/100, batch:   678/  792, ite: 41187] train loss: 5.756919, tar: 0.61

[epoch:   9/100, batch:   758/  792, ite: 41227] train loss: 5.740910, tar: 0.616317 
l0: 0.415867, l1: 0.412071, l2: 0.413842, l3: 0.421425, l4: 0.409532, l5: 0.479199, l6: 0.585357

[epoch:   9/100, batch:   760/  792, ite: 41228] train loss: 5.739341, tar: 0.616154 
l0: 0.732857, l1: 0.733445, l2: 0.734489, l3: 0.745612, l4: 0.753424, l5: 0.879443, l6: 0.999499

[epoch:   9/100, batch:   762/  792, ite: 41229] train loss: 5.740073, tar: 0.616249 
l0: 0.430903, l1: 0.437019, l2: 0.441265, l3: 0.453627, l4: 0.463566, l5: 0.498069, l6: 0.622711

[epoch:   9/100, batch:   764/  792, ite: 41230] train loss: 5.738617, tar: 0.616099 
l0: 0.441060, l1: 0.446857, l2: 0.445667, l3: 0.452902, l4: 0.454784, l5: 0.583276, l6: 0.679788

[epoch:   9/100, batch:   766/  792, ite: 41231] train loss: 5.737404, tar: 0.615956 
l0: 0.884321, l1: 0.887541, l2: 0.872637, l3: 0.873014, l4: 0.913382, l5: 0.917731, l6: 0.969309

[epoch:   9/100, batch:   768/  792, ite: 41232] train loss: 5.738725, tar: 0.61

l0: 0.776034, l1: 0.777696, l2: 0.780904, l3: 0.786063, l4: 0.786790, l5: 0.850672, l6: 1.023181

[epoch:  10/100, batch:    56/  792, ite: 41272] train loss: 5.745334, tar: 0.617150 
l0: 0.361015, l1: 0.366550, l2: 0.366046, l3: 0.387213, l4: 0.408213, l5: 0.558770, l6: 0.659607

[epoch:  10/100, batch:    58/  792, ite: 41273] train loss: 5.743803, tar: 0.616949 
l0: 0.557492, l1: 0.549452, l2: 0.552253, l3: 0.565124, l4: 0.622404, l5: 0.680689, l6: 0.827374

[epoch:  10/100, batch:    60/  792, ite: 41274] train loss: 5.743374, tar: 0.616902 
l0: 0.937825, l1: 0.944141, l2: 0.944624, l3: 0.935485, l4: 0.958704, l5: 1.081555, l6: 1.192247

[epoch:  10/100, batch:    62/  792, ite: 41275] train loss: 5.745399, tar: 0.617154 
l0: 0.604469, l1: 0.613759, l2: 0.610501, l3: 0.606645, l4: 0.629010, l5: 0.700502, l6: 0.757559

[epoch:  10/100, batch:    64/  792, ite: 41276] train loss: 5.745105, tar: 0.617144 
l0: 2.507322, l1: 2.468082, l2: 2.485379, l3: 2.496088, l4: 2.534532, l5: 2.4678

[epoch:  10/100, batch:   144/  792, ite: 41316] train loss: 5.741790, tar: 0.616856 
l0: 0.543537, l1: 0.554703, l2: 0.559171, l3: 0.560920, l4: 0.647284, l5: 0.810018, l6: 0.929425

[epoch:  10/100, batch:   146/  792, ite: 41317] train loss: 5.741674, tar: 0.616800 
l0: 0.780055, l1: 0.779300, l2: 0.778610, l3: 0.779225, l4: 0.787250, l5: 0.831471, l6: 0.801021

[epoch:  10/100, batch:   148/  792, ite: 41318] train loss: 5.742299, tar: 0.616924 
l0: 0.811658, l1: 0.823010, l2: 0.831684, l3: 0.843083, l4: 0.906763, l5: 0.972035, l6: 1.156096

[epoch:  10/100, batch:   150/  792, ite: 41319] train loss: 5.743676, tar: 0.617072 
l0: 1.421426, l1: 1.433799, l2: 1.423056, l3: 1.424693, l4: 1.440820, l5: 1.479638, l6: 1.624342

[epoch:  10/100, batch:   152/  792, ite: 41320] train loss: 5.748457, tar: 0.617681 
l0: 0.990425, l1: 0.996965, l2: 0.998668, l3: 1.010106, l4: 1.048979, l5: 1.076365, l6: 1.439415

[epoch:  10/100, batch:   154/  792, ite: 41321] train loss: 5.751036, tar: 0.61

[epoch:  10/100, batch:   234/  792, ite: 41361] train loss: 5.727377, tar: 0.615110 
l0: 0.418158, l1: 0.423067, l2: 0.428577, l3: 0.441607, l4: 0.505489, l5: 0.732238, l6: 0.838873

[epoch:  10/100, batch:   236/  792, ite: 41362] train loss: 5.726608, tar: 0.614965 
l0: 1.143847, l1: 1.143940, l2: 1.141657, l3: 1.144352, l4: 1.182130, l5: 1.222734, l6: 1.462397

[epoch:  10/100, batch:   238/  792, ite: 41363] train loss: 5.729719, tar: 0.615353 
l0: 1.090152, l1: 1.092722, l2: 1.084439, l3: 1.091157, l4: 1.100622, l5: 1.148049, l6: 1.355224

[epoch:  10/100, batch:   240/  792, ite: 41364] train loss: 5.732356, tar: 0.615701 
l0: 0.406288, l1: 0.415729, l2: 0.411743, l3: 0.415935, l4: 0.422472, l5: 0.545586, l6: 0.877873

[epoch:  10/100, batch:   242/  792, ite: 41365] train loss: 5.731396, tar: 0.615548 
l0: 0.612963, l1: 0.608287, l2: 0.613291, l3: 0.622810, l4: 0.627334, l5: 0.753921, l6: 0.892112

[epoch:  10/100, batch:   244/  792, ite: 41366] train loss: 5.731352, tar: 0.61

[epoch:  10/100, batch:   324/  792, ite: 41406] train loss: 5.714968, tar: 0.613184 
l0: 0.408804, l1: 0.409860, l2: 0.409011, l3: 0.412879, l4: 0.438070, l5: 0.517049, l6: 0.762295

[epoch:  10/100, batch:   326/  792, ite: 41407] train loss: 5.713882, tar: 0.613038 
l0: 0.731088, l1: 0.730307, l2: 0.730555, l3: 0.726243, l4: 0.743163, l5: 0.795914, l6: 1.001781

[epoch:  10/100, batch:   328/  792, ite: 41408] train loss: 5.714471, tar: 0.613122 
l0: 0.635953, l1: 0.638969, l2: 0.631344, l3: 0.635036, l4: 0.664317, l5: 0.658695, l6: 0.794820

[epoch:  10/100, batch:   330/  792, ite: 41409] train loss: 5.714354, tar: 0.613138 
l0: 0.327036, l1: 0.332670, l2: 0.333867, l3: 0.333144, l4: 0.351534, l5: 0.458191, l6: 0.547074

[epoch:  10/100, batch:   332/  792, ite: 41410] train loss: 5.712608, tar: 0.612935 
l0: 0.604706, l1: 0.620191, l2: 0.620329, l3: 0.622331, l4: 0.638513, l5: 0.697528, l6: 0.729221

[epoch:  10/100, batch:   334/  792, ite: 41411] train loss: 5.712431, tar: 0.61

[epoch:  10/100, batch:   414/  792, ite: 41451] train loss: 5.713925, tar: 0.612865 
l0: 0.478962, l1: 0.477223, l2: 0.482052, l3: 0.483109, l4: 0.537493, l5: 0.646594, l6: 0.675572

[epoch:  10/100, batch:   416/  792, ite: 41452] train loss: 5.713052, tar: 0.612773 
l0: 0.469325, l1: 0.465096, l2: 0.470866, l3: 0.485463, l4: 0.511421, l5: 0.583318, l6: 0.786012

[epoch:  10/100, batch:   418/  792, ite: 41453] train loss: 5.712205, tar: 0.612674 
l0: 0.391550, l1: 0.395074, l2: 0.395547, l3: 0.420695, l4: 0.435015, l5: 0.531528, l6: 0.685417

[epoch:  10/100, batch:   420/  792, ite: 41454] train loss: 5.711052, tar: 0.612522 
l0: 0.853417, l1: 0.855218, l2: 0.856539, l3: 0.849058, l4: 0.857444, l5: 0.929126, l6: 0.991001

[epoch:  10/100, batch:   422/  792, ite: 41455] train loss: 5.712058, tar: 0.612688 
l0: 0.685089, l1: 0.695336, l2: 0.692070, l3: 0.699778, l4: 0.731277, l5: 0.785775, l6: 0.964607

[epoch:  10/100, batch:   424/  792, ite: 41456] train loss: 5.712506, tar: 0.61

[epoch:  10/100, batch:   504/  792, ite: 41496] train loss: 5.715546, tar: 0.612837 
l0: 0.671981, l1: 0.675772, l2: 0.677602, l3: 0.704690, l4: 0.723376, l5: 0.798740, l6: 0.997529

[epoch:  10/100, batch:   506/  792, ite: 41497] train loss: 5.715963, tar: 0.612877 
l0: 0.444973, l1: 0.450561, l2: 0.445060, l3: 0.442063, l4: 0.501822, l5: 0.549681, l6: 0.761883

[epoch:  10/100, batch:   508/  792, ite: 41498] train loss: 5.715010, tar: 0.612765 
l0: 1.400009, l1: 1.414041, l2: 1.399239, l3: 1.412659, l4: 1.424170, l5: 1.521992, l6: 1.621882

[epoch:  10/100, batch:   510/  792, ite: 41499] train loss: 5.718987, tar: 0.613290 
l0: 0.699365, l1: 0.702958, l2: 0.699891, l3: 0.695671, l4: 0.723643, l5: 0.788773, l6: 0.940892

[epoch:  10/100, batch:   512/  792, ite: 41500] train loss: 5.719356, tar: 0.613347 
l0: 0.476113, l1: 0.480341, l2: 0.480086, l3: 0.487742, l4: 0.510221, l5: 0.607557, l6: 0.764855

[epoch:  10/100, batch:   514/  792, ite: 41501] train loss: 5.718595, tar: 0.61

[epoch:  10/100, batch:   594/  792, ite: 41541] train loss: 5.714837, tar: 0.612835 
l0: 0.571437, l1: 0.571292, l2: 0.577486, l3: 0.587821, l4: 0.604034, l5: 0.721605, l6: 0.827078

[epoch:  10/100, batch:   596/  792, ite: 41542] train loss: 5.714620, tar: 0.612808 
l0: 0.596121, l1: 0.601125, l2: 0.596645, l3: 0.595203, l4: 0.606754, l5: 0.736596, l6: 0.890376

[epoch:  10/100, batch:   598/  792, ite: 41543] train loss: 5.714519, tar: 0.612798 
l0: 0.427250, l1: 0.433943, l2: 0.432854, l3: 0.440325, l4: 0.497176, l5: 0.604705, l6: 0.676210

[epoch:  10/100, batch:   600/  792, ite: 41544] train loss: 5.713551, tar: 0.612677 
l0: 0.620761, l1: 0.624377, l2: 0.615771, l3: 0.612625, l4: 0.666387, l5: 0.749275, l6: 0.988021

[epoch:  10/100, batch:   602/  792, ite: 41545] train loss: 5.713691, tar: 0.612683 
l0: 0.414003, l1: 0.424986, l2: 0.425840, l3: 0.437832, l4: 0.472749, l5: 0.582984, l6: 0.726083

[epoch:  10/100, batch:   604/  792, ite: 41546] train loss: 5.712700, tar: 0.61

[epoch:  10/100, batch:   684/  792, ite: 41586] train loss: 5.707205, tar: 0.611822 
l0: 0.474135, l1: 0.476371, l2: 0.476558, l3: 0.483779, l4: 0.488123, l5: 0.591195, l6: 0.701492

[epoch:  10/100, batch:   686/  792, ite: 41587] train loss: 5.706410, tar: 0.611736 
l0: 0.510281, l1: 0.520103, l2: 0.517761, l3: 0.533450, l4: 0.587706, l5: 0.712865, l6: 0.897979

[epoch:  10/100, batch:   688/  792, ite: 41588] train loss: 5.706106, tar: 0.611672 
l0: 0.468595, l1: 0.473032, l2: 0.473894, l3: 0.469246, l4: 0.478484, l5: 0.546481, l6: 0.712029

[epoch:  10/100, batch:   690/  792, ite: 41589] train loss: 5.705258, tar: 0.611582 
l0: 0.993710, l1: 1.005630, l2: 1.005505, l3: 1.024882, l4: 1.078263, l5: 1.109101, l6: 1.745399

[epoch:  10/100, batch:   692/  792, ite: 41590] train loss: 5.707775, tar: 0.611822 
l0: 0.901105, l1: 0.904381, l2: 0.907395, l3: 0.917941, l4: 0.953592, l5: 1.113657, l6: 1.165741

[epoch:  10/100, batch:   694/  792, ite: 41591] train loss: 5.709256, tar: 0.61

[epoch:  10/100, batch:   774/  792, ite: 41631] train loss: 5.720723, tar: 0.613493 
l0: 0.589840, l1: 0.595432, l2: 0.598590, l3: 0.620372, l4: 0.646071, l5: 0.785886, l6: 1.019612

[epoch:  10/100, batch:   776/  792, ite: 41632] train loss: 5.720804, tar: 0.613478 
l0: 0.681527, l1: 0.689928, l2: 0.692564, l3: 0.703763, l4: 0.697415, l5: 0.788814, l6: 1.006674

[epoch:  10/100, batch:   778/  792, ite: 41633] train loss: 5.721157, tar: 0.613520 
l0: 0.348689, l1: 0.346246, l2: 0.344244, l3: 0.349447, l4: 0.378176, l5: 0.510622, l6: 0.678543

[epoch:  10/100, batch:   780/  792, ite: 41634] train loss: 5.719898, tar: 0.613358 
l0: 0.509804, l1: 0.513388, l2: 0.517051, l3: 0.531305, l4: 0.594942, l5: 0.704040, l6: 0.764501

[epoch:  10/100, batch:   782/  792, ite: 41635] train loss: 5.719440, tar: 0.613294 
l0: 0.934861, l1: 0.934858, l2: 0.925087, l3: 0.929360, l4: 0.974642, l5: 0.960913, l6: 1.067901

[epoch:  10/100, batch:   784/  792, ite: 41636] train loss: 5.720724, tar: 0.61

l0: 0.728036, l1: 0.726771, l2: 0.732313, l3: 0.729403, l4: 0.751435, l5: 0.845606, l6: 0.888228

[epoch:  11/100, batch:    72/  792, ite: 41676] train loss: 5.725980, tar: 0.613758 
l0: 0.513012, l1: 0.514103, l2: 0.508343, l3: 0.503542, l4: 0.520611, l5: 0.562460, l6: 0.778384

[epoch:  11/100, batch:    74/  792, ite: 41677] train loss: 5.725388, tar: 0.613698 
l0: 0.638355, l1: 0.634067, l2: 0.635384, l3: 0.628451, l4: 0.648702, l5: 0.659004, l6: 0.744926

[epoch:  11/100, batch:    76/  792, ite: 41678] train loss: 5.725213, tar: 0.613713 
l0: 0.548773, l1: 0.533301, l2: 0.525885, l3: 0.532680, l4: 0.579752, l5: 0.617861, l6: 0.651531

[epoch:  11/100, batch:    78/  792, ite: 41679] train loss: 5.724619, tar: 0.613674 
l0: 0.385436, l1: 0.383251, l2: 0.382444, l3: 0.379475, l4: 0.402750, l5: 0.440584, l6: 0.517591

[epoch:  11/100, batch:    80/  792, ite: 41680] train loss: 5.723308, tar: 0.613538 
l0: 0.255067, l1: 0.253626, l2: 0.257330, l3: 0.258989, l4: 0.297285, l5: 0.4430

[epoch:  11/100, batch:   160/  792, ite: 41720] train loss: 5.724828, tar: 0.613671 
l0: 0.514156, l1: 0.517474, l2: 0.516510, l3: 0.526132, l4: 0.541172, l5: 0.583706, l6: 0.775588

[epoch:  11/100, batch:   162/  792, ite: 41721] train loss: 5.724302, tar: 0.613613 
l0: 0.410879, l1: 0.423519, l2: 0.425921, l3: 0.440327, l4: 0.454271, l5: 0.574307, l6: 0.755261

[epoch:  11/100, batch:   164/  792, ite: 41722] train loss: 5.723424, tar: 0.613496 
l0: 1.127289, l1: 1.109786, l2: 1.105574, l3: 1.128743, l4: 1.240786, l5: 1.216655, l6: 1.438099

[epoch:  11/100, batch:   166/  792, ite: 41723] train loss: 5.725843, tar: 0.613794 
l0: 0.529620, l1: 0.534258, l2: 0.534510, l3: 0.540297, l4: 0.559640, l5: 0.635941, l6: 0.778966

[epoch:  11/100, batch:   168/  792, ite: 41724] train loss: 5.725368, tar: 0.613745 
l0: 0.528962, l1: 0.531233, l2: 0.529398, l3: 0.533044, l4: 0.547087, l5: 0.639584, l6: 0.837883

[epoch:  11/100, batch:   170/  792, ite: 41725] train loss: 5.724991, tar: 0.61

[epoch:  11/100, batch:   250/  792, ite: 41765] train loss: 5.716989, tar: 0.612644 
l0: 1.035999, l1: 1.051469, l2: 1.066331, l3: 1.063291, l4: 1.007666, l5: 0.994120, l6: 0.992802

[epoch:  11/100, batch:   252/  792, ite: 41766] train loss: 5.718465, tar: 0.612884 
l0: 1.072179, l1: 1.102980, l2: 1.100111, l3: 1.113973, l4: 1.154183, l5: 1.268136, l6: 1.588235

[epoch:  11/100, batch:   254/  792, ite: 41767] train loss: 5.720990, tar: 0.613144 
l0: 0.570930, l1: 0.572884, l2: 0.570959, l3: 0.580029, l4: 0.618317, l5: 0.677709, l6: 0.862669

[epoch:  11/100, batch:   256/  792, ite: 41768] train loss: 5.720848, tar: 0.613120 
l0: 0.434215, l1: 0.444940, l2: 0.447089, l3: 0.458605, l4: 0.471291, l5: 0.550305, l6: 0.680454

[epoch:  11/100, batch:   258/  792, ite: 41769] train loss: 5.719969, tar: 0.613019 
l0: 0.531739, l1: 0.538712, l2: 0.539022, l3: 0.556567, l4: 0.599692, l5: 0.638832, l6: 0.897751

[epoch:  11/100, batch:   260/  792, ite: 41770] train loss: 5.719778, tar: 0.61

[epoch:  11/100, batch:   340/  792, ite: 41810] train loss: 5.711975, tar: 0.612104 
l0: 1.029255, l1: 1.018341, l2: 1.019321, l3: 1.021489, l4: 1.078159, l5: 1.112193, l6: 1.121719

[epoch:  11/100, batch:   342/  792, ite: 41811] train loss: 5.713546, tar: 0.612334 
l0: 0.379252, l1: 0.380169, l2: 0.383308, l3: 0.393728, l4: 0.439411, l5: 0.594989, l6: 0.767815

[epoch:  11/100, batch:   344/  792, ite: 41812] train loss: 5.712705, tar: 0.612206 
l0: 0.940058, l1: 0.955735, l2: 0.956687, l3: 0.971397, l4: 1.045337, l5: 1.174267, l6: 1.200707

[epoch:  11/100, batch:   346/  792, ite: 41813] train loss: 5.714249, tar: 0.612386 
l0: 0.587226, l1: 0.588635, l2: 0.587772, l3: 0.588994, l4: 0.629570, l5: 0.759048, l6: 0.975126

[epoch:  11/100, batch:   348/  792, ite: 41814] train loss: 5.714230, tar: 0.612373 
l0: 0.628676, l1: 0.634804, l2: 0.631404, l3: 0.630123, l4: 0.647852, l5: 0.687600, l6: 0.766011

[epoch:  11/100, batch:   350/  792, ite: 41815] train loss: 5.714094, tar: 0.61

[epoch:  11/100, batch:   430/  792, ite: 41855] train loss: 5.711136, tar: 0.612138 
l0: 0.229324, l1: 0.232630, l2: 0.230811, l3: 0.239428, l4: 0.291956, l5: 0.407911, l6: 0.454714

[epoch:  11/100, batch:   432/  792, ite: 41856] train loss: 5.709460, tar: 0.611932 
l0: 0.499417, l1: 0.501586, l2: 0.500026, l3: 0.499283, l4: 0.516209, l5: 0.588271, l6: 0.806500

[epoch:  11/100, batch:   434/  792, ite: 41857] train loss: 5.708905, tar: 0.611872 
l0: 0.452241, l1: 0.448983, l2: 0.444296, l3: 0.441435, l4: 0.474605, l5: 0.591597, l6: 0.720930

[epoch:  11/100, batch:   436/  792, ite: 41858] train loss: 5.708190, tar: 0.611786 
l0: 0.455471, l1: 0.457267, l2: 0.451801, l3: 0.459021, l4: 0.464646, l5: 0.517571, l6: 0.615448

[epoch:  11/100, batch:   438/  792, ite: 41859] train loss: 5.707426, tar: 0.611702 
l0: 0.568299, l1: 0.566857, l2: 0.560544, l3: 0.560495, l4: 0.576173, l5: 0.685830, l6: 1.093759

[epoch:  11/100, batch:   440/  792, ite: 41860] train loss: 5.707440, tar: 0.61

[epoch:  11/100, batch:   520/  792, ite: 41900] train loss: 5.701422, tar: 0.610765 
l0: 0.399694, l1: 0.397504, l2: 0.399514, l3: 0.409185, l4: 0.437869, l5: 0.549037, l6: 0.759117

[epoch:  11/100, batch:   522/  792, ite: 41901] train loss: 5.700543, tar: 0.610654 
l0: 0.694411, l1: 0.709760, l2: 0.709381, l3: 0.714521, l4: 0.758863, l5: 0.908382, l6: 1.110390

[epoch:  11/100, batch:   524/  792, ite: 41902] train loss: 5.701080, tar: 0.610698 
l0: 0.714743, l1: 0.717056, l2: 0.716800, l3: 0.722253, l4: 0.763480, l5: 0.831351, l6: 0.894917

[epoch:  11/100, batch:   526/  792, ite: 41903] train loss: 5.701428, tar: 0.610753 
l0: 0.541061, l1: 0.534296, l2: 0.530483, l3: 0.536741, l4: 0.568539, l5: 0.645848, l6: 0.771060

[epoch:  11/100, batch:   528/  792, ite: 41904] train loss: 5.701028, tar: 0.610716 
l0: 0.659158, l1: 0.662347, l2: 0.664272, l3: 0.660771, l4: 0.696734, l5: 0.718246, l6: 1.002231

[epoch:  11/100, batch:   530/  792, ite: 41905] train loss: 5.701258, tar: 0.61

[epoch:  11/100, batch:   610/  792, ite: 41945] train loss: 5.704862, tar: 0.611182 
l0: 0.643382, l1: 0.642485, l2: 0.643502, l3: 0.648380, l4: 0.665322, l5: 0.666938, l6: 0.816618

[epoch:  11/100, batch:   612/  792, ite: 41946] train loss: 5.704830, tar: 0.611199 
l0: 0.429564, l1: 0.427240, l2: 0.429952, l3: 0.437661, l4: 0.465435, l5: 0.523036, l6: 0.668323

[epoch:  11/100, batch:   614/  792, ite: 41947] train loss: 5.704014, tar: 0.611105 
l0: 0.514919, l1: 0.516316, l2: 0.516070, l3: 0.524289, l4: 0.549777, l5: 0.618781, l6: 0.825502

[epoch:  11/100, batch:   616/  792, ite: 41948] train loss: 5.703603, tar: 0.611056 
l0: 0.780745, l1: 0.781963, l2: 0.776428, l3: 0.771938, l4: 0.767839, l5: 0.770021, l6: 0.848276

[epoch:  11/100, batch:   618/  792, ite: 41949] train loss: 5.703966, tar: 0.611143 
l0: 0.280449, l1: 0.283502, l2: 0.288937, l3: 0.326526, l4: 0.388441, l5: 0.543339, l6: 0.746908

[epoch:  11/100, batch:   620/  792, ite: 41950] train loss: 5.702928, tar: 0.61

[epoch:  11/100, batch:   700/  792, ite: 41990] train loss: 5.702174, tar: 0.611126 
l0: 0.573058, l1: 0.574992, l2: 0.576428, l3: 0.583489, l4: 0.634032, l5: 0.728652, l6: 1.000619

[epoch:  11/100, batch:   702/  792, ite: 41991] train loss: 5.702136, tar: 0.611107 
l0: 0.694176, l1: 0.697193, l2: 0.699723, l3: 0.720225, l4: 0.757553, l5: 0.803917, l6: 0.874775

[epoch:  11/100, batch:   704/  792, ite: 41992] train loss: 5.702371, tar: 0.611149 
l0: 0.696636, l1: 0.699474, l2: 0.699034, l3: 0.702696, l4: 0.707998, l5: 0.743642, l6: 1.009493

[epoch:  11/100, batch:   706/  792, ite: 41993] train loss: 5.702711, tar: 0.611192 
l0: 0.668882, l1: 0.657931, l2: 0.658819, l3: 0.662558, l4: 0.685182, l5: 0.779961, l6: 1.225376

[epoch:  11/100, batch:   708/  792, ite: 41994] train loss: 5.703383, tar: 0.611221 
l0: 0.677011, l1: 0.674799, l2: 0.675269, l3: 0.665652, l4: 0.655685, l5: 0.708300, l6: 0.850375

[epoch:  11/100, batch:   710/  792, ite: 41995] train loss: 5.703432, tar: 0.61

[epoch:  11/100, batch:   790/  792, ite: 42035] train loss: 5.633900, tar: 0.603541 
l0: 0.640149, l1: 0.641026, l2: 0.640771, l3: 0.637054, l4: 0.643533, l5: 0.744843, l6: 0.984422

[epoch:  11/100, batch:   792/  792, ite: 42036] train loss: 5.645582, tar: 0.604558 
Starting epoch 12
Epoch 12 loading complete
l0: 0.647081, l1: 0.657405, l2: 0.659056, l3: 0.679791, l4: 0.742944, l5: 0.769479, l6: 1.097906

[epoch:  12/100, batch:     2/  792, ite: 42037] train loss: 5.668025, tar: 0.605707 
l0: 0.584817, l1: 0.579801, l2: 0.572738, l3: 0.562300, l4: 0.604699, l5: 0.769558, l6: 1.039781

[epoch:  12/100, batch:     4/  792, ite: 42038] train loss: 5.670048, tar: 0.605157 
l0: 0.495395, l1: 0.507886, l2: 0.503624, l3: 0.505013, l4: 0.522195, l5: 0.674567, l6: 0.799911

[epoch:  12/100, batch:     6/  792, ite: 42039] train loss: 5.649314, tar: 0.602343 
l0: 0.343571, l1: 0.343134, l2: 0.346216, l3: 0.349530, l4: 0.391180, l5: 0.465202, l6: 0.574016

[epoch:  12/100, batch:     8/  792,

l0: 0.978717, l1: 0.982846, l2: 0.985359, l3: 0.981260, l4: 0.989062, l5: 1.039866, l6: 1.133639

[epoch:  12/100, batch:    88/  792, ite: 42080] train loss: 5.635467, tar: 0.600103 
l0: 0.563279, l1: 0.565426, l2: 0.560014, l3: 0.568040, l4: 0.613038, l5: 0.792054, l6: 1.029169

[epoch:  12/100, batch:    90/  792, ite: 42081] train loss: 5.637467, tar: 0.599648 
l0: 1.178232, l1: 1.174461, l2: 1.179582, l3: 1.197973, l4: 1.228188, l5: 1.249403, l6: 1.438108

[epoch:  12/100, batch:    92/  792, ite: 42082] train loss: 5.693179, tar: 0.606704 
l0: 0.623247, l1: 0.617437, l2: 0.617476, l3: 0.624708, l4: 0.676371, l5: 0.820882, l6: 0.965001

[epoch:  12/100, batch:    94/  792, ite: 42083] train loss: 5.695521, tar: 0.606904 
l0: 0.685702, l1: 0.688577, l2: 0.695296, l3: 0.697729, l4: 0.745725, l5: 0.888783, l6: 0.962166

[epoch:  12/100, batch:    96/  792, ite: 42084] train loss: 5.703098, tar: 0.607842 
l0: 0.413428, l1: 0.430672, l2: 0.437869, l3: 0.444478, l4: 0.478214, l5: 0.5542

[epoch:  12/100, batch:   176/  792, ite: 42124] train loss: 5.608997, tar: 0.597173 
l0: 0.328039, l1: 0.327267, l2: 0.329447, l3: 0.334682, l4: 0.376945, l5: 0.498481, l6: 0.579620

[epoch:  12/100, batch:   178/  792, ite: 42125] train loss: 5.590896, tar: 0.595020 
l0: 0.946262, l1: 0.949808, l2: 0.955826, l3: 0.949576, l4: 0.985568, l5: 1.074692, l6: 1.232321

[epoch:  12/100, batch:   180/  792, ite: 42126] train loss: 5.611820, tar: 0.597807 
l0: 0.902285, l1: 0.911764, l2: 0.915715, l3: 0.918296, l4: 0.955849, l5: 1.008912, l6: 0.957715

[epoch:  12/100, batch:   182/  792, ite: 42127] train loss: 5.627552, tar: 0.600205 
l0: 0.639002, l1: 0.644350, l2: 0.638994, l3: 0.642789, l4: 0.711474, l5: 0.820087, l6: 1.094057

[epoch:  12/100, batch:   184/  792, ite: 42128] train loss: 5.634622, tar: 0.600508 
l0: 0.961161, l1: 0.961963, l2: 0.943353, l3: 0.931223, l4: 0.979757, l5: 0.980484, l6: 1.164074

[epoch:  12/100, batch:   186/  792, ite: 42129] train loss: 5.653432, tar: 0.60

[epoch:  12/100, batch:   266/  792, ite: 42169] train loss: 5.581998, tar: 0.601052 
l0: 0.603021, l1: 0.605139, l2: 0.608746, l3: 0.617402, l4: 0.646234, l5: 0.750004, l6: 0.907398

[epoch:  12/100, batch:   268/  792, ite: 42170] train loss: 5.582798, tar: 0.601064 
l0: 0.720233, l1: 0.726530, l2: 0.728755, l3: 0.716208, l4: 0.739950, l5: 0.792323, l6: 1.396170

[epoch:  12/100, batch:   270/  792, ite: 42171] train loss: 5.592282, tar: 0.601761 
l0: 0.437794, l1: 0.442938, l2: 0.439942, l3: 0.435385, l4: 0.464253, l5: 0.543269, l6: 0.665958

[epoch:  12/100, batch:   272/  792, ite: 42172] train loss: 5.583482, tar: 0.600808 
l0: 0.544376, l1: 0.545246, l2: 0.535232, l3: 0.539600, l4: 0.554480, l5: 0.645891, l6: 0.719710

[epoch:  12/100, batch:   274/  792, ite: 42173] train loss: 5.579685, tar: 0.600481 
l0: 0.709647, l1: 0.712429, l2: 0.715020, l3: 0.711297, l4: 0.761710, l5: 0.914035, l6: 0.984911

[epoch:  12/100, batch:   276/  792, ite: 42174] train loss: 5.585479, tar: 0.60

[epoch:  12/100, batch:   356/  792, ite: 42214] train loss: 5.551826, tar: 0.595783 
l0: 0.701673, l1: 0.707467, l2: 0.705315, l3: 0.714828, l4: 0.725516, l5: 0.793461, l6: 1.056319

[epoch:  12/100, batch:   358/  792, ite: 42215] train loss: 5.556817, tar: 0.596275 
l0: 0.573193, l1: 0.573601, l2: 0.572725, l3: 0.583066, l4: 0.618534, l5: 0.612844, l6: 0.789863

[epoch:  12/100, batch:   360/  792, ite: 42216] train loss: 5.555232, tar: 0.596168 
l0: 0.432041, l1: 0.436821, l2: 0.434834, l3: 0.431909, l4: 0.445902, l5: 0.517060, l6: 0.633296

[epoch:  12/100, batch:   362/  792, ite: 42217] train loss: 5.548433, tar: 0.595412 
l0: 0.911185, l1: 0.919573, l2: 0.921806, l3: 0.922660, l4: 0.926232, l5: 0.947223, l6: 0.976008

[epoch:  12/100, batch:   364/  792, ite: 42218] train loss: 5.557645, tar: 0.596860 
l0: 0.582453, l1: 0.584030, l2: 0.580270, l3: 0.571724, l4: 0.595521, l5: 0.606937, l6: 0.730388

[epoch:  12/100, batch:   366/  792, ite: 42219] train loss: 5.555254, tar: 0.59

[epoch:  12/100, batch:   446/  792, ite: 42259] train loss: 5.559662, tar: 0.596511 
l0: 1.133578, l1: 1.131655, l2: 1.124712, l3: 1.121428, l4: 1.116382, l5: 1.161571, l6: 1.178486

[epoch:  12/100, batch:   448/  792, ite: 42260] train loss: 5.574111, tar: 0.598576 
l0: 0.446786, l1: 0.448504, l2: 0.451541, l3: 0.448089, l4: 0.484483, l5: 0.492612, l6: 0.662972

[epoch:  12/100, batch:   450/  792, ite: 42261] train loss: 5.568865, tar: 0.597995 
l0: 0.784631, l1: 0.778915, l2: 0.783827, l3: 0.787490, l4: 0.828984, l5: 0.890087, l6: 1.076843

[epoch:  12/100, batch:   452/  792, ite: 42262] train loss: 5.574585, tar: 0.598707 
l0: 0.605686, l1: 0.612071, l2: 0.610943, l3: 0.604196, l4: 0.634907, l5: 0.722880, l6: 0.896090

[epoch:  12/100, batch:   454/  792, ite: 42263] train loss: 5.574556, tar: 0.598734 
l0: 0.583906, l1: 0.584991, l2: 0.583313, l3: 0.582405, l4: 0.584647, l5: 0.686815, l6: 0.761964

[epoch:  12/100, batch:   456/  792, ite: 42264] train loss: 5.573063, tar: 0.59

[epoch:  12/100, batch:   536/  792, ite: 42304] train loss: 5.645550, tar: 0.607233 
l0: 0.469562, l1: 0.467491, l2: 0.467312, l3: 0.473173, l4: 0.517754, l5: 0.601087, l6: 0.747087

[epoch:  12/100, batch:   538/  792, ite: 42305] train loss: 5.642359, tar: 0.606781 
l0: 0.279819, l1: 0.283994, l2: 0.280032, l3: 0.279104, l4: 0.316165, l5: 0.472445, l6: 0.557150

[epoch:  12/100, batch:   540/  792, ite: 42306] train loss: 5.633881, tar: 0.605713 
l0: 0.785524, l1: 0.791790, l2: 0.796161, l3: 0.805475, l4: 0.822036, l5: 0.782591, l6: 0.874012

[epoch:  12/100, batch:   542/  792, ite: 42307] train loss: 5.636919, tar: 0.606299 
l0: 0.773652, l1: 0.768232, l2: 0.763080, l3: 0.761074, l4: 0.777663, l5: 0.847086, l6: 0.895324

[epoch:  12/100, batch:   544/  792, ite: 42308] train loss: 5.640576, tar: 0.606842 
l0: 0.643764, l1: 0.642886, l2: 0.641389, l3: 0.637759, l4: 0.645437, l5: 0.660484, l6: 0.774377

[epoch:  12/100, batch:   546/  792, ite: 42309] train loss: 5.640006, tar: 0.60

[epoch:  12/100, batch:   626/  792, ite: 42349] train loss: 5.645090, tar: 0.606546 
l0: 0.372674, l1: 0.375995, l2: 0.374878, l3: 0.377472, l4: 0.427541, l5: 0.503325, l6: 0.632501

[epoch:  12/100, batch:   628/  792, ite: 42350] train loss: 5.639802, tar: 0.605878 
l0: 0.505605, l1: 0.505363, l2: 0.503298, l3: 0.505153, l4: 0.530264, l5: 0.629386, l6: 0.727154

[epoch:  12/100, batch:   630/  792, ite: 42351] train loss: 5.637061, tar: 0.605592 
l0: 0.601901, l1: 0.610026, l2: 0.610892, l3: 0.605205, l4: 0.615314, l5: 0.699218, l6: 0.839789

[epoch:  12/100, batch:   632/  792, ite: 42352] train loss: 5.636384, tar: 0.605582 
l0: 0.418486, l1: 0.421740, l2: 0.422505, l3: 0.427433, l4: 0.463087, l5: 0.499673, l6: 0.682310

[epoch:  12/100, batch:   634/  792, ite: 42353] train loss: 5.631923, tar: 0.605052 
l0: 0.958361, l1: 0.953492, l2: 0.954163, l3: 0.950656, l4: 0.930432, l5: 1.139901, l6: 1.364010

[epoch:  12/100, batch:   636/  792, ite: 42354] train loss: 5.641197, tar: 0.60

[epoch:  12/100, batch:   716/  792, ite: 42394] train loss: 5.604540, tar: 0.600242 
l0: 0.375504, l1: 0.378613, l2: 0.379247, l3: 0.380227, l4: 0.390141, l5: 0.500861, l6: 0.659508

[epoch:  12/100, batch:   718/  792, ite: 42395] train loss: 5.600138, tar: 0.599673 
l0: 0.581420, l1: 0.576085, l2: 0.580856, l3: 0.592901, l4: 0.653132, l5: 0.809847, l6: 0.993624

[epoch:  12/100, batch:   720/  792, ite: 42396] train loss: 5.600421, tar: 0.599627 
l0: 0.835433, l1: 0.839304, l2: 0.837518, l3: 0.846088, l4: 0.879480, l5: 0.938861, l6: 1.094876

[epoch:  12/100, batch:   722/  792, ite: 42397] train loss: 5.605287, tar: 0.600221 
l0: 0.736238, l1: 0.741646, l2: 0.740074, l3: 0.747970, l4: 0.782528, l5: 0.950528, l6: 1.564472

[epoch:  12/100, batch:   724/  792, ite: 42398] train loss: 5.610839, tar: 0.600562 
l0: 0.596381, l1: 0.592101, l2: 0.591011, l3: 0.601411, l4: 0.623200, l5: 0.749917, l6: 0.925800

[epoch:  12/100, batch:   726/  792, ite: 42399] train loss: 5.611032, tar: 0.60

l0: 0.537278, l1: 0.549257, l2: 0.557310, l3: 0.582035, l4: 0.627413, l5: 0.723345, l6: 0.925470

[epoch:  13/100, batch:    14/  792, ite: 42439] train loss: 5.584938, tar: 0.597347 
l0: 0.550597, l1: 0.549241, l2: 0.548131, l3: 0.547287, l4: 0.566891, l5: 0.690017, l6: 0.905350

[epoch:  13/100, batch:    16/  792, ite: 42440] train loss: 5.584127, tar: 0.597241 
l0: 0.941791, l1: 0.940705, l2: 0.941566, l3: 0.944947, l4: 0.973487, l5: 1.024976, l6: 1.281833

[epoch:  13/100, batch:    18/  792, ite: 42441] train loss: 5.590670, tar: 0.598022 
l0: 0.956445, l1: 0.952137, l2: 0.949925, l3: 0.962998, l4: 1.018680, l5: 1.142486, l6: 1.253660

[epoch:  13/100, batch:    20/  792, ite: 42442] train loss: 5.597356, tar: 0.598833 
l0: 0.503124, l1: 0.511726, l2: 0.511967, l3: 0.522412, l4: 0.546131, l5: 0.557238, l6: 0.638817

[epoch:  13/100, batch:    22/  792, ite: 42443] train loss: 5.595104, tar: 0.598617 
l0: 0.667631, l1: 0.675967, l2: 0.678853, l3: 0.681581, l4: 0.701136, l5: 0.8187

[epoch:  13/100, batch:   102/  792, ite: 42483] train loss: 5.558861, tar: 0.592871 
l0: 0.479368, l1: 0.479286, l2: 0.481409, l3: 0.493335, l4: 0.520920, l5: 0.568550, l6: 0.820228

[epoch:  13/100, batch:   104/  792, ite: 42484] train loss: 5.557012, tar: 0.592637 
l0: 0.529620, l1: 0.542735, l2: 0.548321, l3: 0.570693, l4: 0.602674, l5: 0.699580, l6: 0.853073

[epoch:  13/100, batch:   106/  792, ite: 42485] train loss: 5.556334, tar: 0.592507 
l0: 0.432775, l1: 0.437451, l2: 0.438874, l3: 0.447305, l4: 0.480238, l5: 0.620513, l6: 0.785709

[epoch:  13/100, batch:   108/  792, ite: 42486] train loss: 5.554124, tar: 0.592178 
l0: 0.562633, l1: 0.565490, l2: 0.563189, l3: 0.585115, l4: 0.598385, l5: 0.674282, l6: 0.748064

[epoch:  13/100, batch:   110/  792, ite: 42487] train loss: 5.553419, tar: 0.592117 
l0: 0.247108, l1: 0.256006, l2: 0.257914, l3: 0.263009, l4: 0.282733, l5: 0.371593, l6: 0.522416

[epoch:  13/100, batch:   112/  792, ite: 42488] train loss: 5.547674, tar: 0.59

[epoch:  13/100, batch:   192/  792, ite: 42528] train loss: 5.602104, tar: 0.597515 
l0: 0.745771, l1: 0.750426, l2: 0.755965, l3: 0.754695, l4: 0.774313, l5: 0.827445, l6: 1.054256

[epoch:  13/100, batch:   194/  792, ite: 42529] train loss: 5.604504, tar: 0.597795 
l0: 0.429043, l1: 0.435237, l2: 0.433977, l3: 0.434887, l4: 0.466029, l5: 0.530960, l6: 0.665207

[epoch:  13/100, batch:   196/  792, ite: 42530] train loss: 5.601725, tar: 0.597477 
l0: 0.388734, l1: 0.391618, l2: 0.392014, l3: 0.405226, l4: 0.459021, l5: 0.640060, l6: 0.940049

[epoch:  13/100, batch:   198/  792, ite: 42531] train loss: 5.599580, tar: 0.597084 
l0: 0.305201, l1: 0.310878, l2: 0.312613, l3: 0.324018, l4: 0.342742, l5: 0.515638, l6: 0.749328

[epoch:  13/100, batch:   200/  792, ite: 42532] train loss: 5.595910, tar: 0.596535 
l0: 0.468253, l1: 0.470472, l2: 0.472235, l3: 0.474598, l4: 0.480492, l5: 0.561587, l6: 0.656461

[epoch:  13/100, batch:   202/  792, ite: 42533] train loss: 5.593576, tar: 0.59

[epoch:  13/100, batch:   282/  792, ite: 42573] train loss: 5.569822, tar: 0.593663 
l0: 0.373137, l1: 0.379286, l2: 0.374240, l3: 0.374520, l4: 0.404853, l5: 0.544532, l6: 0.638020

[epoch:  13/100, batch:   284/  792, ite: 42574] train loss: 5.566742, tar: 0.593278 
l0: 1.120781, l1: 1.123966, l2: 1.124559, l3: 1.141311, l4: 1.141116, l5: 1.138952, l6: 1.714996

[epoch:  13/100, batch:   286/  792, ite: 42575] train loss: 5.574916, tar: 0.594196 
l0: 0.963041, l1: 0.971325, l2: 0.967046, l3: 0.975224, l4: 1.017318, l5: 1.005827, l6: 1.067626

[epoch:  13/100, batch:   288/  792, ite: 42576] train loss: 5.579201, tar: 0.594836 
l0: 0.327042, l1: 0.338718, l2: 0.340345, l3: 0.352980, l4: 0.348203, l5: 0.462007, l6: 0.579792

[epoch:  13/100, batch:   290/  792, ite: 42577] train loss: 5.575339, tar: 0.594372 
l0: 0.706632, l1: 0.705888, l2: 0.704668, l3: 0.713749, l4: 0.737786, l5: 0.812554, l6: 1.525801

[epoch:  13/100, batch:   292/  792, ite: 42578] train loss: 5.578703, tar: 0.59

[epoch:  13/100, batch:   372/  792, ite: 42618] train loss: 5.597060, tar: 0.597255 
l0: 0.656024, l1: 0.661697, l2: 0.662964, l3: 0.681361, l4: 0.724321, l5: 0.913487, l6: 1.005460

[epoch:  13/100, batch:   374/  792, ite: 42619] train loss: 5.598423, tar: 0.597350 
l0: 0.421271, l1: 0.425260, l2: 0.425151, l3: 0.431668, l4: 0.474558, l5: 0.596724, l6: 0.755005

[epoch:  13/100, batch:   376/  792, ite: 42620] train loss: 5.596360, tar: 0.597066 
l0: 1.073268, l1: 1.084314, l2: 1.087106, l3: 1.120521, l4: 1.177630, l5: 1.227086, l6: 1.208742

[epoch:  13/100, batch:   378/  792, ite: 42621] train loss: 5.602150, tar: 0.597833 
l0: 0.634011, l1: 0.635694, l2: 0.635561, l3: 0.641255, l4: 0.672378, l5: 0.776596, l6: 0.988786

[epoch:  13/100, batch:   380/  792, ite: 42622] train loss: 5.602757, tar: 0.597891 
l0: 0.475393, l1: 0.474948, l2: 0.481176, l3: 0.496766, l4: 0.523682, l5: 0.696103, l6: 1.188111

[epoch:  13/100, batch:   382/  792, ite: 42623] train loss: 5.602655, tar: 0.59

[epoch:  13/100, batch:   462/  792, ite: 42663] train loss: 5.596367, tar: 0.596635 
l0: 0.809613, l1: 0.810389, l2: 0.814926, l3: 0.809706, l4: 0.820950, l5: 0.905353, l6: 0.999821

[epoch:  13/100, batch:   464/  792, ite: 42664] train loss: 5.598479, tar: 0.596955 
l0: 0.620192, l1: 0.632429, l2: 0.626395, l3: 0.606428, l4: 0.613973, l5: 0.695672, l6: 0.774133

[epoch:  13/100, batch:   466/  792, ite: 42665] train loss: 5.598043, tar: 0.596990 
l0: 0.581108, l1: 0.575794, l2: 0.575878, l3: 0.584007, l4: 0.614812, l5: 0.653444, l6: 0.705950

[epoch:  13/100, batch:   468/  792, ite: 42666] train loss: 5.597389, tar: 0.596966 
l0: 0.335722, l1: 0.351283, l2: 0.354237, l3: 0.359776, l4: 0.403961, l5: 0.526199, l6: 0.574080

[epoch:  13/100, batch:   470/  792, ite: 42667] train loss: 5.594292, tar: 0.596575 
l0: 0.417379, l1: 0.422708, l2: 0.420215, l3: 0.422538, l4: 0.446301, l5: 0.564650, l6: 0.685669

[epoch:  13/100, batch:   472/  792, ite: 42668] train loss: 5.591973, tar: 0.59

[epoch:  13/100, batch:   552/  792, ite: 42708] train loss: 5.620475, tar: 0.599707 
l0: 0.640426, l1: 0.650285, l2: 0.657590, l3: 0.668609, l4: 0.681537, l5: 0.692723, l6: 0.870846

[epoch:  13/100, batch:   554/  792, ite: 42709] train loss: 5.620887, tar: 0.599764 
l0: 0.200791, l1: 0.204409, l2: 0.202107, l3: 0.209341, l4: 0.245230, l5: 0.310120, l6: 0.352088

[epoch:  13/100, batch:   556/  792, ite: 42710] train loss: 5.615917, tar: 0.599202 
l0: 1.382370, l1: 1.403836, l2: 1.400118, l3: 1.417381, l4: 1.449639, l5: 1.503332, l6: 1.523662

[epoch:  13/100, batch:   558/  792, ite: 42711] train loss: 5.624615, tar: 0.600304 
l0: 0.576310, l1: 0.578719, l2: 0.581239, l3: 0.586449, l4: 0.633036, l5: 0.684985, l6: 0.877100

[epoch:  13/100, batch:   560/  792, ite: 42712] train loss: 5.624549, tar: 0.600270 
l0: 0.570891, l1: 0.579125, l2: 0.584808, l3: 0.581755, l4: 0.586063, l5: 0.643418, l6: 0.962248

[epoch:  13/100, batch:   562/  792, ite: 42713] train loss: 5.624433, tar: 0.60

[epoch:  13/100, batch:   642/  792, ite: 42753] train loss: 5.642463, tar: 0.602603 
l0: 0.834985, l1: 0.839204, l2: 0.843233, l3: 0.835808, l4: 0.846182, l5: 0.960868, l6: 1.140003

[epoch:  13/100, batch:   644/  792, ite: 42754] train loss: 5.644941, tar: 0.602911 
l0: 0.455757, l1: 0.456651, l2: 0.458490, l3: 0.468816, l4: 0.471974, l5: 0.551078, l6: 0.669388

[epoch:  13/100, batch:   646/  792, ite: 42755] train loss: 5.643147, tar: 0.602716 
l0: 1.049539, l1: 1.066146, l2: 1.066898, l3: 1.059784, l4: 1.122001, l5: 1.219087, l6: 1.370559

[epoch:  13/100, batch:   648/  792, ite: 42756] train loss: 5.648109, tar: 0.603307 
l0: 0.471187, l1: 0.467512, l2: 0.465706, l3: 0.480912, l4: 0.505653, l5: 0.634009, l6: 0.761125

[epoch:  13/100, batch:   650/  792, ite: 42757] train loss: 5.646773, tar: 0.603133 
l0: 0.568547, l1: 0.573925, l2: 0.576848, l3: 0.584310, l4: 0.613198, l5: 0.744317, l6: 0.883486

[epoch:  13/100, batch:   652/  792, ite: 42758] train loss: 5.646515, tar: 0.60

[epoch:  13/100, batch:   732/  792, ite: 42798] train loss: 5.618607, tar: 0.600206 
l0: 0.345876, l1: 0.350363, l2: 0.347659, l3: 0.346656, l4: 0.371689, l5: 0.402552, l6: 0.606397

[epoch:  13/100, batch:   734/  792, ite: 42799] train loss: 5.615885, tar: 0.599888 
l0: 0.603828, l1: 0.605843, l2: 0.609734, l3: 0.618467, l4: 0.645316, l5: 0.809573, l6: 1.179397

[epoch:  13/100, batch:   736/  792, ite: 42800] train loss: 5.616785, tar: 0.599893 
l0: 1.081844, l1: 1.090101, l2: 1.073163, l3: 1.074188, l4: 1.075842, l5: 1.062557, l6: 1.192678

[epoch:  13/100, batch:   738/  792, ite: 42801] train loss: 5.620906, tar: 0.600494 
l0: 0.360725, l1: 0.360106, l2: 0.361035, l3: 0.375471, l4: 0.415382, l5: 0.542386, l6: 0.706321

[epoch:  13/100, batch:   740/  792, ite: 42802] train loss: 5.618714, tar: 0.600195 
l0: 0.659629, l1: 0.653249, l2: 0.655355, l3: 0.653372, l4: 0.673433, l5: 0.769715, l6: 0.938510

[epoch:  13/100, batch:   742/  792, ite: 42803] train loss: 5.619208, tar: 0.60

l0: 0.573044, l1: 0.584413, l2: 0.592798, l3: 0.602840, l4: 0.615367, l5: 0.800839, l6: 0.950475

[epoch:  14/100, batch:    30/  792, ite: 42843] train loss: 5.604767, tar: 0.598919 
l0: 0.478420, l1: 0.482107, l2: 0.484010, l3: 0.484088, l4: 0.516003, l5: 0.593510, l6: 0.776657

[epoch:  14/100, batch:    32/  792, ite: 42844] train loss: 5.603618, tar: 0.598776 
l0: 0.375284, l1: 0.379384, l2: 0.378575, l3: 0.393479, l4: 0.459190, l5: 0.510569, l6: 0.655695

[epoch:  14/100, batch:    34/  792, ite: 42845] train loss: 5.601590, tar: 0.598512 
l0: 0.366704, l1: 0.372551, l2: 0.374514, l3: 0.382850, l4: 0.419215, l5: 0.492811, l6: 0.648067

[epoch:  14/100, batch:    36/  792, ite: 42846] train loss: 5.599476, tar: 0.598238 
l0: 0.401992, l1: 0.406341, l2: 0.410389, l3: 0.426679, l4: 0.457476, l5: 0.617098, l6: 0.655002

[epoch:  14/100, batch:    38/  792, ite: 42847] train loss: 5.597756, tar: 0.598006 
l0: 0.682249, l1: 0.683210, l2: 0.681980, l3: 0.684807, l4: 0.679812, l5: 0.7232

[epoch:  14/100, batch:   118/  792, ite: 42887] train loss: 5.578857, tar: 0.595925 
l0: 0.435878, l1: 0.444066, l2: 0.442440, l3: 0.454386, l4: 0.494841, l5: 0.591011, l6: 0.751481

[epoch:  14/100, batch:   120/  792, ite: 42888] train loss: 5.577625, tar: 0.595745 
l0: 0.382011, l1: 0.385098, l2: 0.385593, l3: 0.382792, l4: 0.380078, l5: 0.505121, l6: 0.721567

[epoch:  14/100, batch:   122/  792, ite: 42889] train loss: 5.575795, tar: 0.595504 
l0: 0.512916, l1: 0.515161, l2: 0.522254, l3: 0.530459, l4: 0.571625, l5: 0.666305, l6: 0.798200

[epoch:  14/100, batch:   124/  792, ite: 42890] train loss: 5.575311, tar: 0.595412 
l0: 0.243303, l1: 0.251088, l2: 0.253779, l3: 0.271104, l4: 0.300546, l5: 0.395281, l6: 0.583659

[epoch:  14/100, batch:   126/  792, ite: 42891] train loss: 5.572393, tar: 0.595017 
l0: 1.236695, l1: 1.249475, l2: 1.247421, l3: 1.251672, l4: 1.263961, l5: 1.314789, l6: 1.733059

[epoch:  14/100, batch:   128/  792, ite: 42892] train loss: 5.578658, tar: 0.59

[epoch:  14/100, batch:   208/  792, ite: 42932] train loss: 5.586165, tar: 0.596199 
l0: 0.694022, l1: 0.698813, l2: 0.697153, l3: 0.704191, l4: 0.752914, l5: 0.882993, l6: 1.267920

[epoch:  14/100, batch:   210/  792, ite: 42933] train loss: 5.587968, tar: 0.596304 
l0: 0.378196, l1: 0.385126, l2: 0.379346, l3: 0.376011, l4: 0.389839, l5: 0.484753, l6: 0.731881

[epoch:  14/100, batch:   212/  792, ite: 42934] train loss: 5.586144, tar: 0.596071 
l0: 0.479529, l1: 0.480809, l2: 0.478020, l3: 0.484238, l4: 0.521271, l5: 0.566940, l6: 0.684062

[epoch:  14/100, batch:   214/  792, ite: 42935] train loss: 5.584956, tar: 0.595946 
l0: 0.619722, l1: 0.615723, l2: 0.615363, l3: 0.618945, l4: 0.640040, l5: 0.786843, l6: 0.902241

[epoch:  14/100, batch:   216/  792, ite: 42936] train loss: 5.585160, tar: 0.595972 
l0: 1.016425, l1: 1.019938, l2: 1.018772, l3: 1.021049, l4: 1.035492, l5: 1.119361, l6: 1.441158

[epoch:  14/100, batch:   218/  792, ite: 42937] train loss: 5.589022, tar: 0.59

[epoch:  14/100, batch:   298/  792, ite: 42977] train loss: 5.571241, tar: 0.593996 
l0: 0.359169, l1: 0.359450, l2: 0.361602, l3: 0.367735, l4: 0.369610, l5: 0.462096, l6: 0.571127

[epoch:  14/100, batch:   300/  792, ite: 42978] train loss: 5.569096, tar: 0.593756 
l0: 0.686489, l1: 0.693377, l2: 0.680109, l3: 0.666355, l4: 0.667218, l5: 0.729250, l6: 0.750000

[epoch:  14/100, batch:   302/  792, ite: 42979] train loss: 5.569112, tar: 0.593851 
l0: 0.416700, l1: 0.420471, l2: 0.419560, l3: 0.430472, l4: 0.465178, l5: 0.530042, l6: 0.704674

[epoch:  14/100, batch:   304/  792, ite: 42980] train loss: 5.567599, tar: 0.593670 
l0: 0.537437, l1: 0.534883, l2: 0.534642, l3: 0.539883, l4: 0.566924, l5: 0.632722, l6: 0.850519

[epoch:  14/100, batch:   306/  792, ite: 42981] train loss: 5.567241, tar: 0.593613 
l0: 0.835812, l1: 0.842216, l2: 0.831439, l3: 0.822144, l4: 0.896280, l5: 1.081334, l6: 1.413816

[epoch:  14/100, batch:   308/  792, ite: 42982] train loss: 5.570009, tar: 0.59

[epoch:  14/100, batch:   388/  792, ite: 43022] train loss: 5.560303, tar: 0.593149 
l0: 0.961789, l1: 0.976345, l2: 0.974021, l3: 0.976818, l4: 1.025247, l5: 1.138964, l6: 1.713423

[epoch:  14/100, batch:   390/  792, ite: 43023] train loss: 5.564397, tar: 0.593509 
l0: 0.784087, l1: 0.787923, l2: 0.792793, l3: 0.790167, l4: 0.774372, l5: 0.806887, l6: 0.904548

[epoch:  14/100, batch:   392/  792, ite: 43024] train loss: 5.565511, tar: 0.593695 
l0: 0.410587, l1: 0.418982, l2: 0.421723, l3: 0.427471, l4: 0.463762, l5: 0.590010, l6: 0.872632

[epoch:  14/100, batch:   394/  792, ite: 43025] train loss: 5.564493, tar: 0.593516 
l0: 0.288773, l1: 0.298277, l2: 0.302419, l3: 0.314786, l4: 0.327970, l5: 0.488509, l6: 0.625999

[epoch:  14/100, batch:   396/  792, ite: 43026] train loss: 5.562239, tar: 0.593219 
l0: 0.471882, l1: 0.473368, l2: 0.474315, l3: 0.491260, l4: 0.506020, l5: 0.609360, l6: 0.686476

[epoch:  14/100, batch:   398/  792, ite: 43027] train loss: 5.561097, tar: 0.59

[epoch:  14/100, batch:   478/  792, ite: 43067] train loss: 5.561466, tar: 0.593017 
l0: 0.757432, l1: 0.760072, l2: 0.761612, l3: 0.761183, l4: 0.794876, l5: 0.863746, l6: 0.915675

[epoch:  14/100, batch:   480/  792, ite: 43068] train loss: 5.562456, tar: 0.593171 
l0: 0.450058, l1: 0.451426, l2: 0.456269, l3: 0.462227, l4: 0.466091, l5: 0.489475, l6: 0.710240

[epoch:  14/100, batch:   482/  792, ite: 43069] train loss: 5.561311, tar: 0.593037 
l0: 0.341327, l1: 0.353018, l2: 0.360703, l3: 0.371237, l4: 0.429299, l5: 0.631305, l6: 0.672565

[epoch:  14/100, batch:   484/  792, ite: 43070] train loss: 5.559736, tar: 0.592802 
l0: 0.357153, l1: 0.356564, l2: 0.357436, l3: 0.357149, l4: 0.375846, l5: 0.464308, l6: 0.593535

[epoch:  14/100, batch:   486/  792, ite: 43071] train loss: 5.557783, tar: 0.592582 
l0: 0.524123, l1: 0.518112, l2: 0.514427, l3: 0.506348, l4: 0.569617, l5: 0.631091, l6: 0.786061

[epoch:  14/100, batch:   488/  792, ite: 43072] train loss: 5.557182, tar: 0.59

[epoch:  14/100, batch:   568/  792, ite: 43112] train loss: 5.581260, tar: 0.595114 
l0: 0.708762, l1: 0.712822, l2: 0.713401, l3: 0.723936, l4: 0.763978, l5: 0.783446, l6: 0.889315

[epoch:  14/100, batch:   570/  792, ite: 43113] train loss: 5.581849, tar: 0.595216 
l0: 0.823884, l1: 0.833518, l2: 0.831450, l3: 0.833427, l4: 0.882029, l5: 0.927178, l6: 1.053633

[epoch:  14/100, batch:   572/  792, ite: 43114] train loss: 5.583354, tar: 0.595421 
l0: 0.419906, l1: 0.420094, l2: 0.421743, l3: 0.421605, l4: 0.519839, l5: 0.627361, l6: 0.773069

[epoch:  14/100, batch:   574/  792, ite: 43115] train loss: 5.582306, tar: 0.595264 
l0: 0.712397, l1: 0.712549, l2: 0.720947, l3: 0.737198, l4: 0.738559, l5: 0.811287, l6: 1.432707

[epoch:  14/100, batch:   576/  792, ite: 43116] train loss: 5.584015, tar: 0.595369 
l0: 0.478425, l1: 0.475542, l2: 0.473276, l3: 0.468532, l4: 0.465083, l5: 0.569281, l6: 0.718071

[epoch:  14/100, batch:   578/  792, ite: 43117] train loss: 5.583045, tar: 0.59

[epoch:  14/100, batch:   658/  792, ite: 43157] train loss: 5.579020, tar: 0.594645 
l0: 0.257947, l1: 0.261692, l2: 0.260933, l3: 0.267913, l4: 0.297841, l5: 0.435631, l6: 0.553080

[epoch:  14/100, batch:   660/  792, ite: 43158] train loss: 5.576754, tar: 0.594354 
l0: 0.574608, l1: 0.585430, l2: 0.587863, l3: 0.597962, l4: 0.642520, l5: 0.814788, l6: 0.947636

[epoch:  14/100, batch:   662/  792, ite: 43159] train loss: 5.576946, tar: 0.594337 
l0: 0.586900, l1: 0.596548, l2: 0.605803, l3: 0.618674, l4: 0.647744, l5: 0.767745, l6: 0.999079

[epoch:  14/100, batch:   664/  792, ite: 43160] train loss: 5.577214, tar: 0.594330 
l0: 0.770265, l1: 0.775235, l2: 0.777799, l3: 0.784297, l4: 0.806960, l5: 0.910066, l6: 1.059093

[epoch:  14/100, batch:   666/  792, ite: 43161] train loss: 5.578390, tar: 0.594482 
l0: 0.777608, l1: 0.779497, l2: 0.774586, l3: 0.772663, l4: 0.784181, l5: 0.895742, l6: 0.958117

[epoch:  14/100, batch:   668/  792, ite: 43162] train loss: 5.579320, tar: 0.59

[epoch:  14/100, batch:   748/  792, ite: 43202] train loss: 5.581738, tar: 0.595072 
l0: 0.649154, l1: 0.646549, l2: 0.649915, l3: 0.653073, l4: 0.669489, l5: 0.783305, l6: 1.015856

[epoch:  14/100, batch:   750/  792, ite: 43203] train loss: 5.582250, tar: 0.595117 
l0: 0.598685, l1: 0.617500, l2: 0.620170, l3: 0.636476, l4: 0.672073, l5: 0.837015, l6: 0.930283

[epoch:  14/100, batch:   752/  792, ite: 43204] train loss: 5.582538, tar: 0.595120 
l0: 0.656688, l1: 0.649395, l2: 0.646373, l3: 0.646927, l4: 0.640440, l5: 0.704355, l6: 0.824672

[epoch:  14/100, batch:   754/  792, ite: 43205] train loss: 5.582688, tar: 0.595171 
l0: 0.546564, l1: 0.554397, l2: 0.554435, l3: 0.560723, l4: 0.595175, l5: 0.668943, l6: 1.069253

[epoch:  14/100, batch:   756/  792, ite: 43206] train loss: 5.582870, tar: 0.595130 
l0: 0.300068, l1: 0.300281, l2: 0.302247, l3: 0.300568, l4: 0.329548, l5: 0.476510, l6: 0.546243

[epoch:  14/100, batch:   758/  792, ite: 43207] train loss: 5.580876, tar: 0.59

l0: 0.223921, l1: 0.223682, l2: 0.224728, l3: 0.230817, l4: 0.276209, l5: 0.366459, l6: 0.492065

[epoch:  15/100, batch:    46/  792, ite: 43247] train loss: 5.574073, tar: 0.594049 
l0: 0.538570, l1: 0.537941, l2: 0.539611, l3: 0.542902, l4: 0.535240, l5: 0.635408, l6: 0.875218

[epoch:  15/100, batch:    48/  792, ite: 43248] train loss: 5.573869, tar: 0.594005 
l0: 0.391554, l1: 0.393556, l2: 0.393141, l3: 0.391094, l4: 0.386330, l5: 0.467000, l6: 0.586169

[epoch:  15/100, batch:    50/  792, ite: 43249] train loss: 5.572298, tar: 0.593842 
l0: 1.111012, l1: 1.114234, l2: 1.116618, l3: 1.110538, l4: 1.095114, l5: 1.164457, l6: 1.122733

[epoch:  15/100, batch:    52/  792, ite: 43250] train loss: 5.575036, tar: 0.594256 
l0: 1.304469, l1: 1.297336, l2: 1.287788, l3: 1.267870, l4: 1.326363, l5: 1.385022, l6: 1.521197

[epoch:  15/100, batch:    54/  792, ite: 43251] train loss: 5.579273, tar: 0.594824 
l0: 0.404356, l1: 0.405627, l2: 0.407299, l3: 0.418576, l4: 0.430624, l5: 0.5201

[epoch:  15/100, batch:   134/  792, ite: 43291] train loss: 5.572949, tar: 0.593939 
l0: 0.591573, l1: 0.595818, l2: 0.594638, l3: 0.604661, l4: 0.666150, l5: 0.742110, l6: 0.855049

[epoch:  15/100, batch:   136/  792, ite: 43292] train loss: 5.572934, tar: 0.593938 
l0: 0.583658, l1: 0.587933, l2: 0.582061, l3: 0.596073, l4: 0.636325, l5: 0.706770, l6: 0.867053

[epoch:  15/100, batch:   138/  792, ite: 43293] train loss: 5.572892, tar: 0.593930 
l0: 0.379206, l1: 0.383786, l2: 0.391911, l3: 0.409698, l4: 0.452747, l5: 0.594244, l6: 1.029823

[epoch:  15/100, batch:   140/  792, ite: 43294] train loss: 5.572176, tar: 0.593764 
l0: 0.432318, l1: 0.439624, l2: 0.439068, l3: 0.426358, l4: 0.445705, l5: 0.492658, l6: 0.667461

[epoch:  15/100, batch:   142/  792, ite: 43295] train loss: 5.570983, tar: 0.593639 
l0: 0.465885, l1: 0.466090, l2: 0.463980, l3: 0.477746, l4: 0.496388, l5: 0.577530, l6: 0.690536

[epoch:  15/100, batch:   144/  792, ite: 43296] train loss: 5.570046, tar: 0.59

[epoch:  15/100, batch:   224/  792, ite: 43336] train loss: 5.578768, tar: 0.594861 
l0: 0.665597, l1: 0.659559, l2: 0.658971, l3: 0.675451, l4: 0.671490, l5: 0.839252, l6: 0.913154

[epoch:  15/100, batch:   226/  792, ite: 43337] train loss: 5.579082, tar: 0.594913 
l0: 0.491819, l1: 0.490128, l2: 0.501378, l3: 0.528770, l4: 0.586685, l5: 0.779218, l6: 1.113722

[epoch:  15/100, batch:   228/  792, ite: 43338] train loss: 5.579189, tar: 0.594836 
l0: 0.697054, l1: 0.704192, l2: 0.696276, l3: 0.707189, l4: 0.746897, l5: 0.805413, l6: 0.956449

[epoch:  15/100, batch:   230/  792, ite: 43339] train loss: 5.579785, tar: 0.594913 
l0: 0.480163, l1: 0.486138, l2: 0.484543, l3: 0.483285, l4: 0.531351, l5: 0.603723, l6: 0.656242

[epoch:  15/100, batch:   232/  792, ite: 43340] train loss: 5.578953, tar: 0.594827 
l0: 0.489726, l1: 0.485587, l2: 0.481356, l3: 0.484944, l4: 0.520257, l5: 0.591418, l6: 0.738314

[epoch:  15/100, batch:   234/  792, ite: 43341] train loss: 5.578214, tar: 0.59

[epoch:  15/100, batch:   314/  792, ite: 43381] train loss: 5.570182, tar: 0.593948 
l0: 0.384979, l1: 0.388786, l2: 0.389411, l3: 0.382494, l4: 0.380569, l5: 0.513419, l6: 0.612260

[epoch:  15/100, batch:   316/  792, ite: 43382] train loss: 5.568800, tar: 0.593797 
l0: 0.332960, l1: 0.333769, l2: 0.332767, l3: 0.337511, l4: 0.371285, l5: 0.452972, l6: 0.515611

[epoch:  15/100, batch:   318/  792, ite: 43383] train loss: 5.567098, tar: 0.593608 
l0: 0.398270, l1: 0.404869, l2: 0.402280, l3: 0.405199, l4: 0.425683, l5: 0.611801, l6: 0.784340

[epoch:  15/100, batch:   320/  792, ite: 43384] train loss: 5.566091, tar: 0.593467 
l0: 0.432424, l1: 0.435372, l2: 0.432722, l3: 0.436018, l4: 0.472840, l5: 0.570920, l6: 0.855333

[epoch:  15/100, batch:   322/  792, ite: 43385] train loss: 5.565422, tar: 0.593351 
l0: 0.859506, l1: 0.860390, l2: 0.863297, l3: 0.857171, l4: 0.876460, l5: 0.993610, l6: 1.254787

[epoch:  15/100, batch:   324/  792, ite: 43386] train loss: 5.567050, tar: 0.59

[epoch:  15/100, batch:   404/  792, ite: 43426] train loss: 5.554589, tar: 0.592030 
l0: 0.697970, l1: 0.704478, l2: 0.706221, l3: 0.709974, l4: 0.775296, l5: 0.927776, l6: 1.003948

[epoch:  15/100, batch:   406/  792, ite: 43427] train loss: 5.555272, tar: 0.592104 
l0: 0.523262, l1: 0.527786, l2: 0.527291, l3: 0.524346, l4: 0.542217, l5: 0.580893, l6: 0.753999

[epoch:  15/100, batch:   408/  792, ite: 43428] train loss: 5.554722, tar: 0.592056 
l0: 0.482084, l1: 0.486081, l2: 0.491137, l3: 0.499874, l4: 0.552514, l5: 0.661661, l6: 0.776332

[epoch:  15/100, batch:   410/  792, ite: 43429] train loss: 5.554136, tar: 0.591979 
l0: 0.467222, l1: 0.472029, l2: 0.469726, l3: 0.484238, l4: 0.560151, l5: 0.723781, l6: 0.923073

[epoch:  15/100, batch:   412/  792, ite: 43430] train loss: 5.553664, tar: 0.591891 
l0: 0.649170, l1: 0.643738, l2: 0.636131, l3: 0.643083, l4: 0.703986, l5: 0.878483, l6: 1.077976

[epoch:  15/100, batch:   414/  792, ite: 43431] train loss: 5.554299, tar: 0.59

[epoch:  15/100, batch:   494/  792, ite: 43471] train loss: 5.548822, tar: 0.591223 
l0: 0.269491, l1: 0.271725, l2: 0.269205, l3: 0.282442, l4: 0.300342, l5: 0.381726, l6: 0.466588

[epoch:  15/100, batch:   496/  792, ite: 43472] train loss: 5.546978, tar: 0.591005 
l0: 0.511954, l1: 0.521290, l2: 0.516588, l3: 0.511438, l4: 0.560775, l5: 0.683033, l6: 0.928530

[epoch:  15/100, batch:   498/  792, ite: 43473] train loss: 5.546810, tar: 0.590951 
l0: 0.404317, l1: 0.406854, l2: 0.408115, l3: 0.410556, l4: 0.448898, l5: 0.569139, l6: 0.762297

[epoch:  15/100, batch:   500/  792, ite: 43474] train loss: 5.545913, tar: 0.590824 
l0: 0.416060, l1: 0.422061, l2: 0.421152, l3: 0.419922, l4: 0.446029, l5: 0.559706, l6: 0.707941

[epoch:  15/100, batch:   502/  792, ite: 43475] train loss: 5.545022, tar: 0.590706 
l0: 0.782477, l1: 0.787561, l2: 0.782958, l3: 0.790306, l4: 0.816875, l5: 0.908187, l6: 1.081865

[epoch:  15/100, batch:   504/  792, ite: 43476] train loss: 5.546068, tar: 0.59

[epoch:  15/100, batch:   584/  792, ite: 43516] train loss: 5.544304, tar: 0.590719 
l0: 0.601730, l1: 0.597347, l2: 0.598969, l3: 0.599975, l4: 0.639500, l5: 0.688630, l6: 0.832288

[epoch:  15/100, batch:   586/  792, ite: 43517] train loss: 5.544249, tar: 0.590726 
l0: 0.732921, l1: 0.731452, l2: 0.730523, l3: 0.728775, l4: 0.739597, l5: 0.770542, l6: 0.955244

[epoch:  15/100, batch:   588/  792, ite: 43518] train loss: 5.544799, tar: 0.590820 
l0: 0.742300, l1: 0.750238, l2: 0.751310, l3: 0.758085, l4: 0.812018, l5: 0.800380, l6: 1.123177

[epoch:  15/100, batch:   590/  792, ite: 43519] train loss: 5.545603, tar: 0.590920 
l0: 0.464913, l1: 0.465398, l2: 0.462665, l3: 0.464807, l4: 0.471679, l5: 0.554540, l6: 0.672326

[epoch:  15/100, batch:   592/  792, ite: 43520] train loss: 5.544818, tar: 0.590837 
l0: 0.809429, l1: 0.817889, l2: 0.816795, l3: 0.822945, l4: 0.848010, l5: 0.851058, l6: 1.066367

[epoch:  15/100, batch:   594/  792, ite: 43521] train loss: 5.546039, tar: 0.59

[epoch:  15/100, batch:   674/  792, ite: 43561] train loss: 5.569670, tar: 0.593925 
l0: 0.388481, l1: 0.392674, l2: 0.388365, l3: 0.393167, l4: 0.419554, l5: 0.503477, l6: 0.747847

[epoch:  15/100, batch:   676/  792, ite: 43562] train loss: 5.568688, tar: 0.593794 
l0: 0.479722, l1: 0.484963, l2: 0.485499, l3: 0.494624, l4: 0.543097, l5: 0.619751, l6: 0.729854

[epoch:  15/100, batch:   678/  792, ite: 43563] train loss: 5.568087, tar: 0.593721 
l0: 0.464095, l1: 0.468115, l2: 0.473907, l3: 0.494355, l4: 0.522779, l5: 0.615781, l6: 1.016118

[epoch:  15/100, batch:   680/  792, ite: 43564] train loss: 5.567741, tar: 0.593638 
l0: 0.347152, l1: 0.351202, l2: 0.345346, l3: 0.335645, l4: 0.336814, l5: 0.379028, l6: 0.497529

[epoch:  15/100, batch:   682/  792, ite: 43565] train loss: 5.566210, tar: 0.593480 
l0: 0.543085, l1: 0.544969, l2: 0.545841, l3: 0.554239, l4: 0.584634, l5: 0.609064, l6: 0.654544

[epoch:  15/100, batch:   684/  792, ite: 43566] train loss: 5.565685, tar: 0.59

[epoch:  15/100, batch:   764/  792, ite: 43606] train loss: 5.570596, tar: 0.594336 
l0: 0.440207, l1: 0.446970, l2: 0.449031, l3: 0.456191, l4: 0.474834, l5: 0.569959, l6: 0.722670

[epoch:  15/100, batch:   766/  792, ite: 43607] train loss: 5.569842, tar: 0.594240 
l0: 0.630022, l1: 0.636769, l2: 0.632647, l3: 0.637608, l4: 0.637140, l5: 0.674512, l6: 0.774079

[epoch:  15/100, batch:   768/  792, ite: 43608] train loss: 5.569780, tar: 0.594262 
l0: 0.733913, l1: 0.733289, l2: 0.731429, l3: 0.740507, l4: 0.779603, l5: 0.849620, l6: 1.181005

[epoch:  15/100, batch:   770/  792, ite: 43609] train loss: 5.570663, tar: 0.594349 
l0: 0.363622, l1: 0.367342, l2: 0.367920, l3: 0.372946, l4: 0.418099, l5: 0.532605, l6: 0.788239

[epoch:  15/100, batch:   772/  792, ite: 43610] train loss: 5.569657, tar: 0.594206 
l0: 0.520361, l1: 0.518881, l2: 0.517200, l3: 0.507457, l4: 0.515828, l5: 0.604113, l6: 0.725183

[epoch:  15/100, batch:   774/  792, ite: 43611] train loss: 5.569158, tar: 0.59

l0: 0.545572, l1: 0.548726, l2: 0.553116, l3: 0.566372, l4: 0.547591, l5: 0.613238, l6: 0.684887

[epoch:  16/100, batch:    62/  792, ite: 43651] train loss: 5.572550, tar: 0.594677 
l0: 0.339460, l1: 0.336296, l2: 0.333361, l3: 0.330433, l4: 0.355280, l5: 0.451171, l6: 0.602748

[epoch:  16/100, batch:    64/  792, ite: 43652] train loss: 5.571219, tar: 0.594523 
l0: 0.401382, l1: 0.404186, l2: 0.403988, l3: 0.425915, l4: 0.443762, l5: 0.519410, l6: 0.784303

[epoch:  16/100, batch:    66/  792, ite: 43653] train loss: 5.570454, tar: 0.594406 
l0: 0.841415, l1: 0.847646, l2: 0.855083, l3: 0.852348, l4: 0.889061, l5: 0.956089, l6: 1.182871

[epoch:  16/100, batch:    68/  792, ite: 43654] train loss: 5.571833, tar: 0.594555 
l0: 0.704152, l1: 0.712331, l2: 0.711187, l3: 0.706169, l4: 0.738238, l5: 0.829032, l6: 0.926026

[epoch:  16/100, batch:    70/  792, ite: 43655] train loss: 5.572288, tar: 0.594621 
l0: 0.506850, l1: 0.509001, l2: 0.509590, l3: 0.518025, l4: 0.573907, l5: 0.6997

[epoch:  16/100, batch:   150/  792, ite: 43695] train loss: 5.566946, tar: 0.594090 
l0: 0.326237, l1: 0.323406, l2: 0.326050, l3: 0.347435, l4: 0.360668, l5: 0.459877, l6: 0.608726

[epoch:  16/100, batch:   152/  792, ite: 43696] train loss: 5.565672, tar: 0.593932 
l0: 0.666463, l1: 0.671092, l2: 0.669688, l3: 0.679211, l4: 0.712581, l5: 0.759212, l6: 0.906263

[epoch:  16/100, batch:   154/  792, ite: 43697] train loss: 5.565957, tar: 0.593974 
l0: 0.487658, l1: 0.490557, l2: 0.489905, l3: 0.497305, l4: 0.534935, l5: 0.589292, l6: 0.794742

[epoch:  16/100, batch:   156/  792, ite: 43698] train loss: 5.565447, tar: 0.593912 
l0: 0.993049, l1: 0.989421, l2: 0.994290, l3: 1.001472, l4: 1.038218, l5: 1.079476, l6: 1.033229

[epoch:  16/100, batch:   158/  792, ite: 43699] train loss: 5.567053, tar: 0.594147 
l0: 0.829198, l1: 0.835610, l2: 0.827519, l3: 0.821921, l4: 0.845156, l5: 0.908714, l6: 1.021379

[epoch:  16/100, batch:   160/  792, ite: 43700] train loss: 5.568034, tar: 0.59

[epoch:  16/100, batch:   240/  792, ite: 43740] train loss: 5.559266, tar: 0.593050 
l0: 0.395010, l1: 0.404422, l2: 0.400738, l3: 0.397245, l4: 0.445781, l5: 0.536037, l6: 0.763673

[epoch:  16/100, batch:   242/  792, ite: 43741] train loss: 5.558479, tar: 0.592936 
l0: 0.264021, l1: 0.269433, l2: 0.269193, l3: 0.275627, l4: 0.313368, l5: 0.409243, l6: 0.509609

[epoch:  16/100, batch:   244/  792, ite: 43742] train loss: 5.556932, tar: 0.592747 
l0: 0.388449, l1: 0.386611, l2: 0.387345, l3: 0.393700, l4: 0.426544, l5: 0.469298, l6: 0.522511

[epoch:  16/100, batch:   246/  792, ite: 43743] train loss: 5.555788, tar: 0.592630 
l0: 0.213501, l1: 0.218726, l2: 0.218240, l3: 0.214857, l4: 0.243515, l5: 0.356007, l6: 0.416709

[epoch:  16/100, batch:   248/  792, ite: 43744] train loss: 5.553946, tar: 0.592412 
l0: 0.539775, l1: 0.543504, l2: 0.547517, l3: 0.552530, l4: 0.539071, l5: 0.595846, l6: 0.718555

[epoch:  16/100, batch:   250/  792, ite: 43745] train loss: 5.553527, tar: 0.59

[epoch:  16/100, batch:   330/  792, ite: 43785] train loss: 5.548608, tar: 0.591732 
l0: 0.540606, l1: 0.541487, l2: 0.539360, l3: 0.539444, l4: 0.552008, l5: 0.646567, l6: 0.642827

[epoch:  16/100, batch:   332/  792, ite: 43786] train loss: 5.548166, tar: 0.591704 
l0: 0.409309, l1: 0.412872, l2: 0.414115, l3: 0.414391, l4: 0.446040, l5: 0.623120, l6: 0.882709

[epoch:  16/100, batch:   334/  792, ite: 43787] train loss: 5.547602, tar: 0.591602 
l0: 0.481402, l1: 0.491822, l2: 0.490315, l3: 0.506256, l4: 0.562777, l5: 0.691187, l6: 0.833476

[epoch:  16/100, batch:   336/  792, ite: 43788] train loss: 5.547270, tar: 0.591540 
l0: 0.627935, l1: 0.640164, l2: 0.633978, l3: 0.639854, l4: 0.669091, l5: 0.815610, l6: 1.054902

[epoch:  16/100, batch:   338/  792, ite: 43789] train loss: 5.547619, tar: 0.591560 
l0: 0.294117, l1: 0.297575, l2: 0.294483, l3: 0.310074, l4: 0.340338, l5: 0.406272, l6: 0.575911

[epoch:  16/100, batch:   340/  792, ite: 43790] train loss: 5.546268, tar: 0.59

[epoch:  16/100, batch:   420/  792, ite: 43830] train loss: 5.540634, tar: 0.590678 
l0: 1.108802, l1: 1.102164, l2: 1.104163, l3: 1.119176, l4: 1.125641, l5: 1.159968, l6: 1.307214

[epoch:  16/100, batch:   422/  792, ite: 43831] train loss: 5.542678, tar: 0.590961 
l0: 1.075189, l1: 1.073136, l2: 1.072906, l3: 1.084413, l4: 1.124666, l5: 1.205239, l6: 1.158792

[epoch:  16/100, batch:   424/  792, ite: 43832] train loss: 5.544601, tar: 0.591225 
l0: 0.473633, l1: 0.480223, l2: 0.478599, l3: 0.495850, l4: 0.552939, l5: 0.728724, l6: 0.923364

[epoch:  16/100, batch:   426/  792, ite: 43833] train loss: 5.544404, tar: 0.591161 
l0: 0.872381, l1: 0.861668, l2: 0.864055, l3: 0.856227, l4: 0.897064, l5: 0.922583, l6: 1.043244

[epoch:  16/100, batch:   428/  792, ite: 43834] train loss: 5.545457, tar: 0.591314 
l0: 0.951271, l1: 0.955740, l2: 0.954789, l3: 0.968111, l4: 0.983145, l5: 1.001472, l6: 1.185639

[epoch:  16/100, batch:   430/  792, ite: 43835] train loss: 5.546991, tar: 0.59

[epoch:  16/100, batch:   510/  792, ite: 43875] train loss: 5.545954, tar: 0.591440 
l0: 0.833727, l1: 0.839816, l2: 0.838249, l3: 0.840621, l4: 0.872185, l5: 0.932293, l6: 1.180023

[epoch:  16/100, batch:   512/  792, ite: 43876] train loss: 5.547067, tar: 0.591569 
l0: 0.294404, l1: 0.298113, l2: 0.298986, l3: 0.316570, l4: 0.379281, l5: 0.525904, l6: 0.738081

[epoch:  16/100, batch:   514/  792, ite: 43877] train loss: 5.546033, tar: 0.591411 
l0: 0.665297, l1: 0.671168, l2: 0.672496, l3: 0.667127, l4: 0.687796, l5: 0.786574, l6: 0.966735

[epoch:  16/100, batch:   516/  792, ite: 43878] train loss: 5.546351, tar: 0.591450 
l0: 0.542253, l1: 0.552117, l2: 0.559419, l3: 0.565877, l4: 0.571426, l5: 0.659683, l6: 0.768966

[epoch:  16/100, batch:   518/  792, ite: 43879] train loss: 5.546122, tar: 0.591424 
l0: 1.882227, l1: 1.901803, l2: 1.889082, l3: 1.905791, l4: 1.937145, l5: 2.160661, l6: 2.779049

[epoch:  16/100, batch:   520/  792, ite: 43880] train loss: 5.552399, tar: 0.59

[epoch:  16/100, batch:   600/  792, ite: 43920] train loss: 5.556491, tar: 0.592656 
l0: 0.443386, l1: 0.446479, l2: 0.450259, l3: 0.447488, l4: 0.502863, l5: 0.598111, l6: 0.787178

[epoch:  16/100, batch:   602/  792, ite: 43921] train loss: 5.555973, tar: 0.592578 
l0: 0.557958, l1: 0.557225, l2: 0.563760, l3: 0.571272, l4: 0.604764, l5: 0.687110, l6: 1.116558

[epoch:  16/100, batch:   604/  792, ite: 43922] train loss: 5.556119, tar: 0.592560 
l0: 0.523386, l1: 0.526410, l2: 0.529607, l3: 0.536474, l4: 0.549292, l5: 0.655918, l6: 0.776705

[epoch:  16/100, batch:   606/  792, ite: 43923] train loss: 5.555821, tar: 0.592524 
l0: 0.467239, l1: 0.464752, l2: 0.464400, l3: 0.485557, l4: 0.545086, l5: 0.563651, l6: 0.730169

[epoch:  16/100, batch:   608/  792, ite: 43924] train loss: 5.555274, tar: 0.592459 
l0: 1.194386, l1: 1.198950, l2: 1.192189, l3: 1.208526, l4: 1.275603, l5: 1.314440, l6: 1.265202

[epoch:  16/100, batch:   610/  792, ite: 43925] train loss: 5.557587, tar: 0.59

[epoch:  16/100, batch:   690/  792, ite: 43965] train loss: 5.560277, tar: 0.593091 
l0: 0.558862, l1: 0.555668, l2: 0.542289, l3: 0.513150, l4: 0.532322, l5: 0.616085, l6: 0.715843

[epoch:  16/100, batch:   692/  792, ite: 43966] train loss: 5.559936, tar: 0.593073 
l0: 0.580195, l1: 0.590654, l2: 0.595825, l3: 0.599005, l4: 0.636964, l5: 0.697131, l6: 0.805097

[epoch:  16/100, batch:   694/  792, ite: 43967] train loss: 5.559780, tar: 0.593067 
l0: 0.568686, l1: 0.570126, l2: 0.572167, l3: 0.579789, l4: 0.617530, l5: 0.800850, l6: 1.111991

[epoch:  16/100, batch:   696/  792, ite: 43968] train loss: 5.559974, tar: 0.593054 
l0: 0.452662, l1: 0.457626, l2: 0.455864, l3: 0.464533, l4: 0.511221, l5: 0.540525, l6: 0.751949

[epoch:  16/100, batch:   698/  792, ite: 43969] train loss: 5.559362, tar: 0.592983 
l0: 0.825554, l1: 0.829280, l2: 0.832866, l3: 0.853413, l4: 0.906578, l5: 1.051850, l6: 1.344648

[epoch:  16/100, batch:   700/  792, ite: 43970] train loss: 5.560679, tar: 0.59

[epoch:  16/100, batch:   780/  792, ite: 44010] train loss: 5.545481, tar: 0.586807 
l0: 0.834721, l1: 0.831415, l2: 0.828487, l3: 0.822833, l4: 0.823625, l5: 0.892119, l6: 1.200437

[epoch:  16/100, batch:   782/  792, ite: 44011] train loss: 5.727724, tar: 0.609345 
l0: 0.829060, l1: 0.835796, l2: 0.838929, l3: 0.832757, l4: 0.845047, l5: 0.846721, l6: 0.892170

[epoch:  16/100, batch:   784/  792, ite: 44012] train loss: 5.822391, tar: 0.627654 
l0: 1.183022, l1: 1.185467, l2: 1.178458, l3: 1.183200, l4: 1.186752, l5: 1.223588, l6: 1.304731

[epoch:  16/100, batch:   786/  792, ite: 44013] train loss: 6.133211, tar: 0.670375 
l0: 0.387245, l1: 0.393358, l2: 0.394179, l3: 0.397652, l4: 0.408531, l5: 0.513794, l6: 0.617820

[epoch:  16/100, batch:   788/  792, ite: 44014] train loss: 5.963929, tar: 0.650152 
l0: 0.305322, l1: 0.306497, l2: 0.308064, l3: 0.313216, l4: 0.346986, l5: 0.489460, l6: 0.590078

[epoch:  16/100, batch:   790/  792, ite: 44015] train loss: 5.783322, tar: 0.62

l0: 0.223793, l1: 0.230077, l2: 0.230687, l3: 0.241526, l4: 0.250167, l5: 0.347015, l6: 0.440554

[epoch:  17/100, batch:    78/  792, ite: 44055] train loss: 5.429667, tar: 0.573540 
l0: 0.823283, l1: 0.829065, l2: 0.829345, l3: 0.843047, l4: 0.874370, l5: 0.881483, l6: 0.935587

[epoch:  17/100, batch:    80/  792, ite: 44056] train loss: 5.458554, tar: 0.577999 
l0: 0.616710, l1: 0.630533, l2: 0.627495, l3: 0.630924, l4: 0.634311, l5: 0.709406, l6: 0.854219

[epoch:  17/100, batch:    82/  792, ite: 44057] train loss: 5.462927, tar: 0.578679 
l0: 0.516218, l1: 0.519097, l2: 0.517412, l3: 0.515304, l4: 0.541849, l5: 0.645213, l6: 0.847878

[epoch:  17/100, batch:    84/  792, ite: 44058] train loss: 5.453723, tar: 0.577602 
l0: 0.755681, l1: 0.760029, l2: 0.759440, l3: 0.762968, l4: 0.780864, l5: 0.807692, l6: 1.069123

[epoch:  17/100, batch:    86/  792, ite: 44059] train loss: 5.479601, tar: 0.580620 
l0: 0.476766, l1: 0.485517, l2: 0.481616, l3: 0.488191, l4: 0.510977, l5: 0.6540

[epoch:  17/100, batch:   166/  792, ite: 44099] train loss: 5.587545, tar: 0.598744 
l0: 0.684429, l1: 0.682696, l2: 0.688506, l3: 0.694609, l4: 0.717141, l5: 0.811925, l6: 1.026449

[epoch:  17/100, batch:   168/  792, ite: 44100] train loss: 5.595756, tar: 0.599601 
l0: 0.434334, l1: 0.435144, l2: 0.434751, l3: 0.438443, l4: 0.498407, l5: 0.666588, l6: 0.773767

[epoch:  17/100, batch:   170/  792, ite: 44101] train loss: 5.585100, tar: 0.597964 
l0: 0.363960, l1: 0.365836, l2: 0.368218, l3: 0.377048, l4: 0.437610, l5: 0.653429, l6: 0.863572

[epoch:  17/100, batch:   172/  792, ite: 44102] train loss: 5.573297, tar: 0.595670 
l0: 0.449292, l1: 0.442539, l2: 0.439328, l3: 0.447784, l4: 0.469373, l5: 0.656976, l6: 0.820701

[epoch:  17/100, batch:   174/  792, ite: 44103] train loss: 5.564233, tar: 0.594249 
l0: 0.463892, l1: 0.473130, l2: 0.466206, l3: 0.475214, l4: 0.520027, l5: 0.645253, l6: 0.795247

[epoch:  17/100, batch:   176/  792, ite: 44104] train loss: 5.555793, tar: 0.59

[epoch:  17/100, batch:   256/  792, ite: 44144] train loss: 5.419414, tar: 0.575396 
l0: 0.489537, l1: 0.488262, l2: 0.489121, l3: 0.503053, l4: 0.545034, l5: 0.615569, l6: 0.796453

[epoch:  17/100, batch:   258/  792, ite: 44145] train loss: 5.415786, tar: 0.574803 
l0: 0.542202, l1: 0.549785, l2: 0.548994, l3: 0.542686, l4: 0.544402, l5: 0.671187, l6: 0.885539

[epoch:  17/100, batch:   260/  792, ite: 44146] train loss: 5.414122, tar: 0.574580 
l0: 0.587977, l1: 0.589779, l2: 0.583309, l3: 0.577161, l4: 0.607068, l5: 0.676418, l6: 0.803386

[epoch:  17/100, batch:   262/  792, ite: 44147] train loss: 5.413798, tar: 0.574671 
l0: 0.541755, l1: 0.541716, l2: 0.543539, l3: 0.548963, l4: 0.577848, l5: 0.722226, l6: 1.247403

[epoch:  17/100, batch:   264/  792, ite: 44148] train loss: 5.417194, tar: 0.574449 
l0: 0.487250, l1: 0.488996, l2: 0.486878, l3: 0.487557, l4: 0.535933, l5: 0.587767, l6: 0.642458

[epoch:  17/100, batch:   266/  792, ite: 44149] train loss: 5.410334, tar: 0.57

[epoch:  17/100, batch:   346/  792, ite: 44189] train loss: 5.409353, tar: 0.574536 
l0: 0.759340, l1: 0.757043, l2: 0.760895, l3: 0.782301, l4: 0.803861, l5: 0.920979, l6: 1.152502

[epoch:  17/100, batch:   348/  792, ite: 44190] train loss: 5.418928, tar: 0.575509 
l0: 0.691533, l1: 0.723333, l2: 0.737168, l3: 0.730566, l4: 0.767223, l5: 0.888788, l6: 1.280770

[epoch:  17/100, batch:   350/  792, ite: 44191] train loss: 5.427782, tar: 0.576116 
l0: 0.897706, l1: 0.898421, l2: 0.897486, l3: 0.892461, l4: 0.927314, l5: 0.984355, l6: 0.912827

[epoch:  17/100, batch:   352/  792, ite: 44192] train loss: 5.437934, tar: 0.577791 
l0: 0.557101, l1: 0.560951, l2: 0.558719, l3: 0.551558, l4: 0.578114, l5: 0.691425, l6: 0.969245

[epoch:  17/100, batch:   354/  792, ite: 44193] train loss: 5.438439, tar: 0.577684 
l0: 0.497922, l1: 0.509411, l2: 0.509145, l3: 0.511065, l4: 0.544080, l5: 0.610740, l6: 0.885592

[epoch:  17/100, batch:   356/  792, ite: 44194] train loss: 5.436426, tar: 0.57

[epoch:  17/100, batch:   436/  792, ite: 44234] train loss: 5.505161, tar: 0.584673 
l0: 0.554852, l1: 0.567654, l2: 0.570646, l3: 0.585565, l4: 0.605429, l5: 0.713655, l6: 0.865153

[epoch:  17/100, batch:   438/  792, ite: 44235] train loss: 5.504598, tar: 0.584546 
l0: 0.779128, l1: 0.778139, l2: 0.777545, l3: 0.776861, l4: 0.826896, l5: 0.955899, l6: 1.195174

[epoch:  17/100, batch:   440/  792, ite: 44236] train loss: 5.512324, tar: 0.585370 
l0: 0.447076, l1: 0.447263, l2: 0.449831, l3: 0.455548, l4: 0.486315, l5: 0.533322, l6: 0.657592

[epoch:  17/100, batch:   442/  792, ite: 44237] train loss: 5.506911, tar: 0.584787 
l0: 0.742966, l1: 0.746821, l2: 0.744439, l3: 0.740198, l4: 0.738562, l5: 0.798607, l6: 0.823477

[epoch:  17/100, batch:   444/  792, ite: 44238] train loss: 5.510034, tar: 0.585451 
l0: 0.452142, l1: 0.465169, l2: 0.466389, l3: 0.478099, l4: 0.501808, l5: 0.612787, l6: 0.758144

[epoch:  17/100, batch:   446/  792, ite: 44239] train loss: 5.505996, tar: 0.58

[epoch:  17/100, batch:   526/  792, ite: 44279] train loss: 5.483647, tar: 0.582427 
l0: 0.584387, l1: 0.590957, l2: 0.595192, l3: 0.588794, l4: 0.603355, l5: 0.596364, l6: 0.616989

[epoch:  17/100, batch:   528/  792, ite: 44280] train loss: 5.481481, tar: 0.582434 
l0: 0.460644, l1: 0.468701, l2: 0.464498, l3: 0.454307, l4: 0.474108, l5: 0.537029, l6: 0.801547

[epoch:  17/100, batch:   530/  792, ite: 44281] train loss: 5.478069, tar: 0.582001 
l0: 0.485389, l1: 0.483642, l2: 0.485511, l3: 0.491032, l4: 0.481745, l5: 0.526451, l6: 0.608488

[epoch:  17/100, batch:   532/  792, ite: 44282] train loss: 5.473648, tar: 0.581658 
l0: 0.623300, l1: 0.625888, l2: 0.625773, l3: 0.614695, l4: 0.600021, l5: 0.669405, l6: 0.815804

[epoch:  17/100, batch:   534/  792, ite: 44283] train loss: 5.473568, tar: 0.581805 
l0: 0.600591, l1: 0.603706, l2: 0.594728, l3: 0.588839, l4: 0.624207, l5: 0.759264, l6: 0.940212

[epoch:  17/100, batch:   536/  792, ite: 44284] train loss: 5.474636, tar: 0.58

[epoch:  17/100, batch:   616/  792, ite: 44324] train loss: 5.471511, tar: 0.581616 
l0: 0.197696, l1: 0.204223, l2: 0.207086, l3: 0.222805, l4: 0.256317, l5: 0.318361, l6: 0.424955

[epoch:  17/100, batch:   618/  792, ite: 44325] train loss: 5.461599, tar: 0.580434 
l0: 0.732215, l1: 0.730677, l2: 0.729479, l3: 0.736332, l4: 0.789354, l5: 0.766161, l6: 0.876653

[epoch:  17/100, batch:   620/  792, ite: 44326] train loss: 5.464236, tar: 0.580900 
l0: 1.113359, l1: 1.131288, l2: 1.131939, l3: 1.135199, l4: 1.162560, l5: 1.312757, l6: 1.802976

[epoch:  17/100, batch:   622/  792, ite: 44327] train loss: 5.480452, tar: 0.582528 
l0: 0.221007, l1: 0.228733, l2: 0.218804, l3: 0.218704, l4: 0.262555, l5: 0.366898, l6: 0.408405

[epoch:  17/100, batch:   624/  792, ite: 44328] train loss: 5.471096, tar: 0.581426 
l0: 1.113241, l1: 1.104721, l2: 1.088972, l3: 1.078346, l4: 1.098745, l5: 1.126842, l6: 1.149577

[epoch:  17/100, batch:   626/  792, ite: 44329] train loss: 5.481946, tar: 0.58

[epoch:  17/100, batch:   706/  792, ite: 44369] train loss: 5.520726, tar: 0.587801 
l0: 0.405125, l1: 0.408831, l2: 0.407672, l3: 0.418127, l4: 0.447329, l5: 0.598637, l6: 0.753805

[epoch:  17/100, batch:   708/  792, ite: 44370] train loss: 5.517085, tar: 0.587307 
l0: 0.365139, l1: 0.368164, l2: 0.368666, l3: 0.378149, l4: 0.424142, l5: 0.509742, l6: 0.644363

[epoch:  17/100, batch:   710/  792, ite: 44371] train loss: 5.512664, tar: 0.586709 
l0: 0.737985, l1: 0.737932, l2: 0.732089, l3: 0.735741, l4: 0.761594, l5: 0.827059, l6: 0.951894

[epoch:  17/100, batch:   712/  792, ite: 44372] train loss: 5.515168, tar: 0.587115 
l0: 0.637520, l1: 0.634519, l2: 0.637634, l3: 0.653882, l4: 0.681984, l5: 0.784531, l6: 1.119264

[epoch:  17/100, batch:   714/  792, ite: 44373] train loss: 5.517475, tar: 0.587250 
l0: 0.377449, l1: 0.376871, l2: 0.378812, l3: 0.407084, l4: 0.469155, l5: 0.524743, l6: 0.631037

[epoch:  17/100, batch:   716/  792, ite: 44374] train loss: 5.513105, tar: 0.58

l0: 0.500820, l1: 0.501955, l2: 0.498785, l3: 0.492066, l4: 0.513480, l5: 0.616059, l6: 0.784070

[epoch:  18/100, batch:     4/  792, ite: 44414] train loss: 5.462736, tar: 0.580971 
l0: 0.858860, l1: 0.867567, l2: 0.873171, l3: 0.891024, l4: 0.907728, l5: 1.003152, l6: 1.077261

[epoch:  18/100, batch:     6/  792, ite: 44415] train loss: 5.467677, tar: 0.581640 
l0: 1.188710, l1: 1.177030, l2: 1.176280, l3: 1.181972, l4: 1.195562, l5: 1.322576, l6: 1.426481

[epoch:  18/100, batch:     8/  792, ite: 44416] train loss: 5.478826, tar: 0.583100 
l0: 0.440318, l1: 0.437321, l2: 0.434574, l3: 0.436216, l4: 0.485602, l5: 0.649151, l6: 0.829553

[epoch:  18/100, batch:    10/  792, ite: 44417] train loss: 5.476412, tar: 0.582757 
l0: 0.466645, l1: 0.475134, l2: 0.470435, l3: 0.462380, l4: 0.503964, l5: 0.602508, l6: 0.800107

[epoch:  18/100, batch:    12/  792, ite: 44418] train loss: 5.474525, tar: 0.582479 
l0: 0.565507, l1: 0.567452, l2: 0.566865, l3: 0.567273, l4: 0.576761, l5: 0.6143

[epoch:  18/100, batch:    92/  792, ite: 44458] train loss: 5.457871, tar: 0.580988 
l0: 0.594086, l1: 0.592645, l2: 0.589548, l3: 0.604069, l4: 0.670352, l5: 0.719587, l6: 0.866139

[epoch:  18/100, batch:    94/  792, ite: 44459] train loss: 5.458035, tar: 0.581017 
l0: 0.404743, l1: 0.400888, l2: 0.398624, l3: 0.405609, l4: 0.425880, l5: 0.479594, l6: 0.642898

[epoch:  18/100, batch:    96/  792, ite: 44460] train loss: 5.454444, tar: 0.580634 
l0: 0.565421, l1: 0.561036, l2: 0.559869, l3: 0.570298, l4: 0.625001, l5: 0.649743, l6: 0.813628

[epoch:  18/100, batch:    98/  792, ite: 44461] train loss: 5.454046, tar: 0.580601 
l0: 0.462673, l1: 0.461516, l2: 0.457886, l3: 0.474117, l4: 0.474131, l5: 0.567758, l6: 0.793772

[epoch:  18/100, batch:   100/  792, ite: 44462] train loss: 5.452125, tar: 0.580345 
l0: 0.741216, l1: 0.747216, l2: 0.745767, l3: 0.751502, l4: 0.782341, l5: 0.875847, l6: 0.958195

[epoch:  18/100, batch:   102/  792, ite: 44463] train loss: 5.454666, tar: 0.58

[epoch:  18/100, batch:   182/  792, ite: 44503] train loss: 5.422324, tar: 0.576783 
l0: 0.302920, l1: 0.306612, l2: 0.303037, l3: 0.309677, l4: 0.328692, l5: 0.418502, l6: 0.605602

[epoch:  18/100, batch:   184/  792, ite: 44504] train loss: 5.417818, tar: 0.576239 
l0: 0.991766, l1: 0.993674, l2: 0.991073, l3: 0.997186, l4: 1.006581, l5: 1.051484, l6: 1.244948

[epoch:  18/100, batch:   186/  792, ite: 44505] train loss: 5.424168, tar: 0.577062 
l0: 0.623719, l1: 0.624701, l2: 0.623393, l3: 0.617989, l4: 0.620081, l5: 0.698562, l6: 0.820883

[epoch:  18/100, batch:   188/  792, ite: 44506] train loss: 5.424542, tar: 0.577154 
l0: 0.480206, l1: 0.478560, l2: 0.476454, l3: 0.479505, l4: 0.531664, l5: 0.599817, l6: 0.904998

[epoch:  18/100, batch:   190/  792, ite: 44507] train loss: 5.423277, tar: 0.576963 
l0: 0.360826, l1: 0.364324, l2: 0.365008, l3: 0.376131, l4: 0.407172, l5: 0.603306, l6: 0.689862

[epoch:  18/100, batch:   192/  792, ite: 44508] train loss: 5.420220, tar: 0.57

[epoch:  18/100, batch:   272/  792, ite: 44548] train loss: 5.421659, tar: 0.576454 
l0: 0.320370, l1: 0.320979, l2: 0.319863, l3: 0.318201, l4: 0.355477, l5: 0.439128, l6: 0.558451

[epoch:  18/100, batch:   274/  792, ite: 44549] train loss: 5.417814, tar: 0.575988 
l0: 0.295380, l1: 0.296278, l2: 0.293772, l3: 0.303426, l4: 0.343653, l5: 0.499318, l6: 0.648911

[epoch:  18/100, batch:   276/  792, ite: 44550] train loss: 5.413883, tar: 0.575477 
l0: 0.257380, l1: 0.255593, l2: 0.255774, l3: 0.261267, l4: 0.307965, l5: 0.418200, l6: 0.633493

[epoch:  18/100, batch:   278/  792, ite: 44551] train loss: 5.409575, tar: 0.574900 
l0: 0.398473, l1: 0.398459, l2: 0.401491, l3: 0.400778, l4: 0.402072, l5: 0.454057, l6: 0.605623

[epoch:  18/100, batch:   280/  792, ite: 44552] train loss: 5.406537, tar: 0.574581 
l0: 0.502178, l1: 0.498534, l2: 0.499063, l3: 0.494388, l4: 0.517987, l5: 0.538776, l6: 0.625665

[epoch:  18/100, batch:   282/  792, ite: 44553] train loss: 5.404837, tar: 0.57

[epoch:  18/100, batch:   362/  792, ite: 44593] train loss: 5.410265, tar: 0.575115 
l0: 0.491824, l1: 0.494705, l2: 0.496703, l3: 0.511212, l4: 0.563038, l5: 0.584037, l6: 0.701713

[epoch:  18/100, batch:   364/  792, ite: 44594] train loss: 5.409001, tar: 0.574975 
l0: 0.802177, l1: 0.797418, l2: 0.795457, l3: 0.797605, l4: 0.790506, l5: 0.851140, l6: 0.924308

[epoch:  18/100, batch:   366/  792, ite: 44595] train loss: 5.411251, tar: 0.575357 
l0: 0.490479, l1: 0.494050, l2: 0.500144, l3: 0.498938, l4: 0.526726, l5: 0.785537, l6: 1.048942

[epoch:  18/100, batch:   368/  792, ite: 44596] train loss: 5.411161, tar: 0.575214 
l0: 0.521917, l1: 0.527648, l2: 0.519945, l3: 0.519765, l4: 0.516452, l5: 0.548594, l6: 0.628826

[epoch:  18/100, batch:   370/  792, ite: 44597] train loss: 5.409509, tar: 0.575125 
l0: 0.300998, l1: 0.296723, l2: 0.297061, l3: 0.299231, l4: 0.323302, l5: 0.469705, l6: 0.617165

[epoch:  18/100, batch:   372/  792, ite: 44598] train loss: 5.406077, tar: 0.57

[epoch:  18/100, batch:   452/  792, ite: 44638] train loss: 5.438749, tar: 0.578801 
l0: 0.593305, l1: 0.588092, l2: 0.586870, l3: 0.589987, l4: 0.594294, l5: 0.659456, l6: 0.967124

[epoch:  18/100, batch:   454/  792, ite: 44639] train loss: 5.438979, tar: 0.578823 
l0: 0.627398, l1: 0.632363, l2: 0.637139, l3: 0.643891, l4: 0.645463, l5: 0.675663, l6: 1.033751

[epoch:  18/100, batch:   456/  792, ite: 44640] train loss: 5.439944, tar: 0.578899 
l0: 0.806724, l1: 0.796638, l2: 0.801176, l3: 0.797220, l4: 0.813920, l5: 0.959298, l6: 1.102641

[epoch:  18/100, batch:   458/  792, ite: 44641] train loss: 5.442601, tar: 0.579255 
l0: 0.362597, l1: 0.362532, l2: 0.360995, l3: 0.356631, l4: 0.411179, l5: 0.586244, l6: 0.744617

[epoch:  18/100, batch:   460/  792, ite: 44642] train loss: 5.440247, tar: 0.578917 
l0: 0.418007, l1: 0.421578, l2: 0.423049, l3: 0.431073, l4: 0.444371, l5: 0.586692, l6: 0.935732

[epoch:  18/100, batch:   462/  792, ite: 44643] train loss: 5.439072, tar: 0.57

[epoch:  18/100, batch:   542/  792, ite: 44683] train loss: 5.434195, tar: 0.577989 
l0: 0.436960, l1: 0.438488, l2: 0.435409, l3: 0.446157, l4: 0.448997, l5: 0.511131, l6: 0.746141

[epoch:  18/100, batch:   544/  792, ite: 44684] train loss: 5.432372, tar: 0.577783 
l0: 0.797053, l1: 0.798710, l2: 0.800413, l3: 0.803604, l4: 0.819509, l5: 0.886006, l6: 0.872884

[epoch:  18/100, batch:   546/  792, ite: 44685] train loss: 5.434312, tar: 0.578103 
l0: 0.543326, l1: 0.550127, l2: 0.548283, l3: 0.541471, l4: 0.585821, l5: 0.667781, l6: 0.802269

[epoch:  18/100, batch:   548/  792, ite: 44686] train loss: 5.433837, tar: 0.578053 
l0: 0.381196, l1: 0.384999, l2: 0.386019, l3: 0.392209, l4: 0.411681, l5: 0.470828, l6: 0.610137

[epoch:  18/100, batch:   550/  792, ite: 44687] train loss: 5.431444, tar: 0.577766 
l0: 0.937482, l1: 0.938166, l2: 0.940300, l3: 0.941140, l4: 0.934665, l5: 0.958165, l6: 1.064733

[epoch:  18/100, batch:   552/  792, ite: 44688] train loss: 5.434950, tar: 0.57

[epoch:  18/100, batch:   632/  792, ite: 44728] train loss: 5.435695, tar: 0.578115 
l0: 0.423293, l1: 0.422285, l2: 0.422380, l3: 0.419037, l4: 0.458424, l5: 0.622947, l6: 0.830048

[epoch:  18/100, batch:   634/  792, ite: 44729] train loss: 5.434457, tar: 0.577903 
l0: 0.873118, l1: 0.878210, l2: 0.886809, l3: 0.885046, l4: 0.881661, l5: 0.852811, l6: 0.881779

[epoch:  18/100, batch:   636/  792, ite: 44730] train loss: 5.437104, tar: 0.578307 
l0: 0.377804, l1: 0.378775, l2: 0.378250, l3: 0.386902, l4: 0.385117, l5: 0.431963, l6: 0.557229

[epoch:  18/100, batch:   638/  792, ite: 44731] train loss: 5.434435, tar: 0.578033 
l0: 0.700215, l1: 0.708925, l2: 0.709377, l3: 0.715955, l4: 0.743429, l5: 0.755683, l6: 0.831133

[epoch:  18/100, batch:   640/  792, ite: 44732] train loss: 5.435239, tar: 0.578200 
l0: 0.463950, l1: 0.466070, l2: 0.466137, l3: 0.473532, l4: 0.516345, l5: 0.632707, l6: 0.858775

[epoch:  18/100, batch:   642/  792, ite: 44733] train loss: 5.434376, tar: 0.57

[epoch:  18/100, batch:   722/  792, ite: 44773] train loss: 5.439995, tar: 0.579125 
l0: 0.432615, l1: 0.435574, l2: 0.439148, l3: 0.434811, l4: 0.448935, l5: 0.566161, l6: 0.666943

[epoch:  18/100, batch:   724/  792, ite: 44774] train loss: 5.438361, tar: 0.578935 
l0: 0.548273, l1: 0.553915, l2: 0.546321, l3: 0.542127, l4: 0.580321, l5: 0.671306, l6: 0.751807

[epoch:  18/100, batch:   726/  792, ite: 44775] train loss: 5.437784, tar: 0.578896 
l0: 0.382054, l1: 0.376652, l2: 0.371882, l3: 0.380609, l4: 0.450514, l5: 0.548790, l6: 0.730318

[epoch:  18/100, batch:   728/  792, ite: 44776] train loss: 5.435968, tar: 0.578642 
l0: 0.240015, l1: 0.245344, l2: 0.248156, l3: 0.257587, l4: 0.284869, l5: 0.317242, l6: 0.436639

[epoch:  18/100, batch:   730/  792, ite: 44777] train loss: 5.432272, tar: 0.578206 
l0: 0.911768, l1: 0.909113, l2: 0.912711, l3: 0.920828, l4: 0.962696, l5: 1.033156, l6: 1.143038

[epoch:  18/100, batch:   732/  792, ite: 44778] train loss: 5.435322, tar: 0.57

l0: 0.473695, l1: 0.481421, l2: 0.478305, l3: 0.482022, l4: 0.512139, l5: 0.632114, l6: 0.791722

[epoch:  19/100, batch:    20/  792, ite: 44818] train loss: 5.462028, tar: 0.581469 
l0: 0.438531, l1: 0.448157, l2: 0.449923, l3: 0.465211, l4: 0.490120, l5: 0.604629, l6: 0.616501

[epoch:  19/100, batch:    22/  792, ite: 44819] train loss: 5.460503, tar: 0.581294 
l0: 0.649898, l1: 0.650223, l2: 0.646339, l3: 0.644730, l4: 0.646304, l5: 0.660391, l6: 0.727519

[epoch:  19/100, batch:    24/  792, ite: 44820] train loss: 5.460541, tar: 0.581378 
l0: 0.514622, l1: 0.517828, l2: 0.516984, l3: 0.520048, l4: 0.570772, l5: 0.641403, l6: 0.744714

[epoch:  19/100, batch:    26/  792, ite: 44821] train loss: 5.459651, tar: 0.581297 
l0: 0.442989, l1: 0.444948, l2: 0.444355, l3: 0.442581, l4: 0.455646, l5: 0.564827, l6: 0.792111

[epoch:  19/100, batch:    28/  792, ite: 44822] train loss: 5.458407, tar: 0.581129 
l0: 0.406256, l1: 0.412563, l2: 0.415460, l3: 0.417309, l4: 0.467545, l5: 0.5617

[epoch:  19/100, batch:   108/  792, ite: 44862] train loss: 5.454295, tar: 0.580638 
l0: 0.815502, l1: 0.823760, l2: 0.821367, l3: 0.835828, l4: 0.894083, l5: 0.974000, l6: 1.242586

[epoch:  19/100, batch:   110/  792, ite: 44863] train loss: 5.457045, tar: 0.580910 
l0: 0.490831, l1: 0.496200, l2: 0.495741, l3: 0.495402, l4: 0.508004, l5: 0.664953, l6: 0.844308

[epoch:  19/100, batch:   112/  792, ite: 44864] train loss: 5.456386, tar: 0.580806 
l0: 0.347556, l1: 0.355997, l2: 0.356092, l3: 0.364397, l4: 0.385099, l5: 0.584869, l6: 0.875143

[epoch:  19/100, batch:   114/  792, ite: 44865] train loss: 5.454817, tar: 0.580537 
l0: 0.561581, l1: 0.559876, l2: 0.563369, l3: 0.570853, l4: 0.594536, l5: 0.622948, l6: 0.818016

[epoch:  19/100, batch:   116/  792, ite: 44866] train loss: 5.454474, tar: 0.580515 
l0: 0.458434, l1: 0.459907, l2: 0.461524, l3: 0.475540, l4: 0.519763, l5: 0.627162, l6: 0.884218

[epoch:  19/100, batch:   118/  792, ite: 44867] train loss: 5.453724, tar: 0.58

[epoch:  19/100, batch:   198/  792, ite: 44907] train loss: 5.488533, tar: 0.584704 
l0: 0.461201, l1: 0.464165, l2: 0.458677, l3: 0.452231, l4: 0.481415, l5: 0.624131, l6: 0.798078

[epoch:  19/100, batch:   200/  792, ite: 44908] train loss: 5.487510, tar: 0.584568 
l0: 0.439438, l1: 0.446731, l2: 0.446749, l3: 0.453740, l4: 0.479067, l5: 0.656076, l6: 0.749737

[epoch:  19/100, batch:   202/  792, ite: 44909] train loss: 5.486480, tar: 0.584408 
l0: 0.530328, l1: 0.533586, l2: 0.534286, l3: 0.547736, l4: 0.553616, l5: 0.613556, l6: 0.795923

[epoch:  19/100, batch:   204/  792, ite: 44910] train loss: 5.485820, tar: 0.584349 
l0: 0.702876, l1: 0.709777, l2: 0.709717, l3: 0.696769, l4: 0.718455, l5: 0.868395, l6: 1.064602

[epoch:  19/100, batch:   206/  792, ite: 44911] train loss: 5.487121, tar: 0.584479 
l0: 0.538386, l1: 0.547356, l2: 0.553557, l3: 0.557155, l4: 0.574554, l5: 0.738835, l6: 0.820834

[epoch:  19/100, batch:   208/  792, ite: 44912] train loss: 5.486855, tar: 0.58

[epoch:  19/100, batch:   288/  792, ite: 44952] train loss: 5.478938, tar: 0.583067 
l0: 0.543500, l1: 0.545911, l2: 0.551150, l3: 0.542912, l4: 0.535043, l5: 0.593286, l6: 0.783009

[epoch:  19/100, batch:   290/  792, ite: 44953] train loss: 5.478344, tar: 0.583026 
l0: 0.827223, l1: 0.827976, l2: 0.828045, l3: 0.824320, l4: 0.847862, l5: 0.944469, l6: 1.071810

[epoch:  19/100, batch:   292/  792, ite: 44954] train loss: 5.480276, tar: 0.583282 
l0: 0.696499, l1: 0.700312, l2: 0.698470, l3: 0.699441, l4: 0.730640, l5: 0.869403, l6: 1.030371

[epoch:  19/100, batch:   294/  792, ite: 44955] train loss: 5.481113, tar: 0.583400 
l0: 0.642280, l1: 0.657493, l2: 0.659058, l3: 0.676723, l4: 0.750109, l5: 0.896365, l6: 1.235740

[epoch:  19/100, batch:   296/  792, ite: 44956] train loss: 5.482600, tar: 0.583462 
l0: 0.347832, l1: 0.349476, l2: 0.349270, l3: 0.353107, l4: 0.360459, l5: 0.429100, l6: 0.556866

[epoch:  19/100, batch:   298/  792, ite: 44957] train loss: 5.480315, tar: 0.58

[epoch:  19/100, batch:   378/  792, ite: 44997] train loss: 5.474944, tar: 0.582674 
l0: 0.368406, l1: 0.375107, l2: 0.376134, l3: 0.378623, l4: 0.395054, l5: 0.493280, l6: 0.682397

[epoch:  19/100, batch:   380/  792, ite: 44998] train loss: 5.473269, tar: 0.582459 
l0: 0.357065, l1: 0.354080, l2: 0.353443, l3: 0.357698, l4: 0.417469, l5: 0.512223, l6: 0.665050

[epoch:  19/100, batch:   382/  792, ite: 44999] train loss: 5.471545, tar: 0.582234 
l0: 0.660311, l1: 0.671835, l2: 0.668858, l3: 0.677336, l4: 0.760655, l5: 0.900864, l6: 1.092389

[epoch:  19/100, batch:   384/  792, ite: 45000] train loss: 5.472421, tar: 0.582312 
l0: 0.721671, l1: 0.723398, l2: 0.720072, l3: 0.718928, l4: 0.755906, l5: 0.904510, l6: 1.063866

[epoch:  19/100, batch:   386/  792, ite: 45001] train loss: 5.473786, tar: 0.582451 
l0: 0.520598, l1: 0.525761, l2: 0.526044, l3: 0.526224, l4: 0.530376, l5: 0.567130, l6: 0.675489

[epoch:  19/100, batch:   388/  792, ite: 45002] train loss: 5.472911, tar: 0.58

[epoch:  19/100, batch:   468/  792, ite: 45042] train loss: 5.457647, tar: 0.580150 
l0: 0.277288, l1: 0.276150, l2: 0.272926, l3: 0.271165, l4: 0.293523, l5: 0.370059, l6: 0.548504

[epoch:  19/100, batch:   470/  792, ite: 45043] train loss: 5.455239, tar: 0.579859 
l0: 0.436056, l1: 0.446883, l2: 0.444943, l3: 0.445316, l4: 0.477209, l5: 0.597509, l6: 0.727153

[epoch:  19/100, batch:   472/  792, ite: 45044] train loss: 5.454195, tar: 0.579721 
l0: 0.434832, l1: 0.440821, l2: 0.456426, l3: 0.488501, l4: 0.525796, l5: 0.687375, l6: 0.786258

[epoch:  19/100, batch:   474/  792, ite: 45045] train loss: 5.453489, tar: 0.579583 
l0: 0.495462, l1: 0.505311, l2: 0.505059, l3: 0.529722, l4: 0.536813, l5: 0.636528, l6: 1.396711

[epoch:  19/100, batch:   476/  792, ite: 45046] train loss: 5.454105, tar: 0.579502 
l0: 1.296188, l1: 1.306055, l2: 1.296810, l3: 1.284873, l4: 1.325155, l5: 1.512842, l6: 2.021904

[epoch:  19/100, batch:   478/  792, ite: 45047] train loss: 5.460556, tar: 0.58

[epoch:  19/100, batch:   558/  792, ite: 45087] train loss: 5.465640, tar: 0.581148 
l0: 0.477761, l1: 0.482243, l2: 0.481064, l3: 0.485344, l4: 0.487373, l5: 0.549102, l6: 0.605207

[epoch:  19/100, batch:   560/  792, ite: 45088] train loss: 5.464493, tar: 0.581053 
l0: 0.251315, l1: 0.255474, l2: 0.257456, l3: 0.255373, l4: 0.290647, l5: 0.373076, l6: 0.553092

[epoch:  19/100, batch:   562/  792, ite: 45089] train loss: 5.462170, tar: 0.580750 
l0: 0.500598, l1: 0.509071, l2: 0.508831, l3: 0.517506, l4: 0.533003, l5: 0.613629, l6: 0.835238

[epoch:  19/100, batch:   564/  792, ite: 45090] train loss: 5.461732, tar: 0.580677 
l0: 0.431109, l1: 0.436985, l2: 0.440305, l3: 0.438712, l4: 0.467160, l5: 0.563639, l6: 0.723752

[epoch:  19/100, batch:   566/  792, ite: 45091] train loss: 5.460813, tar: 0.580540 
l0: 0.502234, l1: 0.513609, l2: 0.504794, l3: 0.505876, l4: 0.565429, l5: 0.716926, l6: 0.972373

[epoch:  19/100, batch:   568/  792, ite: 45092] train loss: 5.460651, tar: 0.58

[epoch:  19/100, batch:   648/  792, ite: 45132] train loss: 5.457334, tar: 0.580243 
l0: 0.597534, l1: 0.595884, l2: 0.594831, l3: 0.596273, l4: 0.621035, l5: 0.714802, l6: 0.810929

[epoch:  19/100, batch:   650/  792, ite: 45133] train loss: 5.457251, tar: 0.580258 
l0: 0.441896, l1: 0.448008, l2: 0.448078, l3: 0.446259, l4: 0.462933, l5: 0.502080, l6: 0.842313

[epoch:  19/100, batch:   652/  792, ite: 45134] train loss: 5.456362, tar: 0.580136 
l0: 0.436721, l1: 0.447218, l2: 0.443875, l3: 0.455753, l4: 0.472187, l5: 0.599984, l6: 0.691132

[epoch:  19/100, batch:   654/  792, ite: 45135] train loss: 5.455315, tar: 0.580010 
l0: 0.395637, l1: 0.403581, l2: 0.402131, l3: 0.409013, l4: 0.438047, l5: 0.516682, l6: 0.655981

[epoch:  19/100, batch:   656/  792, ite: 45136] train loss: 5.453967, tar: 0.579847 
l0: 0.328459, l1: 0.331120, l2: 0.331262, l3: 0.338782, l4: 0.360846, l5: 0.449629, l6: 0.583014

[epoch:  19/100, batch:   658/  792, ite: 45137] train loss: 5.452139, tar: 0.57

[epoch:  19/100, batch:   738/  792, ite: 45177] train loss: 5.438362, tar: 0.577660 
l0: 0.657536, l1: 0.658213, l2: 0.661056, l3: 0.676332, l4: 0.689143, l5: 0.933449, l6: 1.104668

[epoch:  19/100, batch:   740/  792, ite: 45178] train loss: 5.439235, tar: 0.577727 
l0: 0.342242, l1: 0.345819, l2: 0.346991, l3: 0.368034, l4: 0.426781, l5: 0.513701, l6: 0.730528

[epoch:  19/100, batch:   742/  792, ite: 45179] train loss: 5.437948, tar: 0.577528 
l0: 0.496136, l1: 0.498502, l2: 0.500924, l3: 0.522621, l4: 0.527790, l5: 0.552709, l6: 0.616431

[epoch:  19/100, batch:   744/  792, ite: 45180] train loss: 5.437051, tar: 0.577459 
l0: 0.482867, l1: 0.482528, l2: 0.480584, l3: 0.477273, l4: 0.498740, l5: 0.594009, l6: 0.710880

[epoch:  19/100, batch:   746/  792, ite: 45181] train loss: 5.436225, tar: 0.577379 
l0: 0.499266, l1: 0.500058, l2: 0.499952, l3: 0.508010, l4: 0.530264, l5: 0.594738, l6: 0.626016

[epoch:  19/100, batch:   748/  792, ite: 45182] train loss: 5.435406, tar: 0.57

l0: 0.416543, l1: 0.430124, l2: 0.423915, l3: 0.425266, l4: 0.468523, l5: 0.552945, l6: 0.733632

[epoch:  20/100, batch:    36/  792, ite: 45222] train loss: 5.428116, tar: 0.576303 
l0: 0.737111, l1: 0.740287, l2: 0.741604, l3: 0.741791, l4: 0.771959, l5: 0.879475, l6: 1.052327

[epoch:  20/100, batch:    38/  792, ite: 45223] train loss: 5.429293, tar: 0.576434 
l0: 0.451677, l1: 0.455904, l2: 0.458776, l3: 0.464077, l4: 0.471715, l5: 0.547698, l6: 0.656666

[epoch:  20/100, batch:    40/  792, ite: 45224] train loss: 5.428293, tar: 0.576333 
l0: 0.458373, l1: 0.466436, l2: 0.463696, l3: 0.479501, l4: 0.543900, l5: 0.705125, l6: 0.782048

[epoch:  20/100, batch:    42/  792, ite: 45225] train loss: 5.427888, tar: 0.576236 
l0: 0.446747, l1: 0.444760, l2: 0.450061, l3: 0.453426, l4: 0.482429, l5: 0.589369, l6: 0.802045

[epoch:  20/100, batch:    44/  792, ite: 45226] train loss: 5.427039, tar: 0.576131 
l0: 0.389086, l1: 0.391408, l2: 0.392481, l3: 0.396595, l4: 0.420441, l5: 0.4891

[epoch:  20/100, batch:   124/  792, ite: 45266] train loss: 5.409703, tar: 0.573988 
l0: 0.545751, l1: 0.550378, l2: 0.550220, l3: 0.562469, l4: 0.635293, l5: 0.781462, l6: 1.120713

[epoch:  20/100, batch:   126/  792, ite: 45267] train loss: 5.410041, tar: 0.573965 
l0: 0.447119, l1: 0.446970, l2: 0.449658, l3: 0.459216, l4: 0.475786, l5: 0.662234, l6: 0.918757

[epoch:  20/100, batch:   128/  792, ite: 45268] train loss: 5.409619, tar: 0.573865 
l0: 0.522882, l1: 0.534796, l2: 0.532713, l3: 0.529714, l4: 0.539510, l5: 0.608022, l6: 0.696009

[epoch:  20/100, batch:   130/  792, ite: 45269] train loss: 5.409031, tar: 0.573825 
l0: 0.668349, l1: 0.672162, l2: 0.680380, l3: 0.702808, l4: 0.738008, l5: 0.763071, l6: 1.126888

[epoch:  20/100, batch:   132/  792, ite: 45270] train loss: 5.410072, tar: 0.573900 
l0: 0.620159, l1: 0.623568, l2: 0.619542, l3: 0.619156, l4: 0.640451, l5: 0.619927, l6: 0.718983

[epoch:  20/100, batch:   134/  792, ite: 45271] train loss: 5.409956, tar: 0.57

[epoch:  20/100, batch:   214/  792, ite: 45311] train loss: 5.410374, tar: 0.574038 
l0: 0.780516, l1: 0.799315, l2: 0.797059, l3: 0.810414, l4: 0.830614, l5: 0.977324, l6: 1.565641

[epoch:  20/100, batch:   216/  792, ite: 45312] train loss: 5.412508, tar: 0.574196 
l0: 1.196511, l1: 1.213128, l2: 1.211642, l3: 1.226159, l4: 1.272709, l5: 1.362574, l6: 1.403261

[epoch:  20/100, batch:   218/  792, ite: 45313] train loss: 5.416241, tar: 0.574670 
l0: 1.268359, l1: 1.268254, l2: 1.262298, l3: 1.266658, l4: 1.267655, l5: 1.327709, l6: 1.449045

[epoch:  20/100, batch:   220/  792, ite: 45314] train loss: 5.420205, tar: 0.575198 
l0: 0.426090, l1: 0.431045, l2: 0.433067, l3: 0.431645, l4: 0.487919, l5: 0.745923, l6: 1.034567

[epoch:  20/100, batch:   222/  792, ite: 45315] train loss: 5.419806, tar: 0.575084 
l0: 0.453507, l1: 0.456653, l2: 0.456549, l3: 0.462122, l4: 0.465205, l5: 0.571450, l6: 0.748199

[epoch:  20/100, batch:   224/  792, ite: 45316] train loss: 5.419105, tar: 0.57

[epoch:  20/100, batch:   304/  792, ite: 45356] train loss: 5.408749, tar: 0.573759 
l0: 0.609287, l1: 0.615559, l2: 0.606098, l3: 0.604630, l4: 0.657036, l5: 0.705779, l6: 0.819705

[epoch:  20/100, batch:   306/  792, ite: 45357] train loss: 5.408789, tar: 0.573786 
l0: 0.494625, l1: 0.496373, l2: 0.496757, l3: 0.503736, l4: 0.508274, l5: 0.589024, l6: 0.813319

[epoch:  20/100, batch:   308/  792, ite: 45358] train loss: 5.408336, tar: 0.573727 
l0: 0.525260, l1: 0.532897, l2: 0.534114, l3: 0.534725, l4: 0.550484, l5: 0.623530, l6: 0.838934

[epoch:  20/100, batch:   310/  792, ite: 45359] train loss: 5.408112, tar: 0.573692 
l0: 0.328109, l1: 0.341977, l2: 0.339862, l3: 0.348912, l4: 0.368811, l5: 0.457288, l6: 0.565091

[epoch:  20/100, batch:   312/  792, ite: 45360] train loss: 5.406630, tar: 0.573511 
l0: 0.383947, l1: 0.390885, l2: 0.393126, l3: 0.400209, l4: 0.469737, l5: 0.673443, l6: 0.938270

[epoch:  20/100, batch:   314/  792, ite: 45361] train loss: 5.406046, tar: 0.57

[epoch:  20/100, batch:   394/  792, ite: 45401] train loss: 5.422292, tar: 0.575382 
l0: 0.531335, l1: 0.528689, l2: 0.527634, l3: 0.528406, l4: 0.559983, l5: 0.656541, l6: 0.929172

[epoch:  20/100, batch:   396/  792, ite: 45402] train loss: 5.422209, tar: 0.575351 
l0: 0.584916, l1: 0.586825, l2: 0.588730, l3: 0.598691, l4: 0.637543, l5: 0.710758, l6: 0.873094

[epoch:  20/100, batch:   398/  792, ite: 45403] train loss: 5.422308, tar: 0.575357 
l0: 0.373446, l1: 0.377710, l2: 0.381207, l3: 0.404859, l4: 0.458449, l5: 0.601716, l6: 0.729895

[epoch:  20/100, batch:   400/  792, ite: 45404] train loss: 5.421336, tar: 0.575214 
l0: 0.524405, l1: 0.524059, l2: 0.523582, l3: 0.527988, l4: 0.541250, l5: 0.655612, l6: 0.871479

[epoch:  20/100, batch:   402/  792, ite: 45405] train loss: 5.421187, tar: 0.575178 
l0: 0.761227, l1: 0.762875, l2: 0.760568, l3: 0.753628, l4: 0.772567, l5: 0.930242, l6: 1.164938

[epoch:  20/100, batch:   404/  792, ite: 45406] train loss: 5.422396, tar: 0.57

[epoch:  20/100, batch:   484/  792, ite: 45446] train loss: 5.426698, tar: 0.576042 
l0: 0.307141, l1: 0.310657, l2: 0.308798, l3: 0.319243, l4: 0.339033, l5: 0.469461, l6: 0.586735

[epoch:  20/100, batch:   486/  792, ite: 45447] train loss: 5.425176, tar: 0.575856 
l0: 0.484610, l1: 0.488091, l2: 0.484070, l3: 0.481848, l4: 0.499198, l5: 0.617564, l6: 0.795111

[epoch:  20/100, batch:   488/  792, ite: 45448] train loss: 5.424705, tar: 0.575793 
l0: 0.260227, l1: 0.265266, l2: 0.260790, l3: 0.270602, l4: 0.278630, l5: 0.434178, l6: 0.488286

[epoch:  20/100, batch:   490/  792, ite: 45449] train loss: 5.422864, tar: 0.575575 
l0: 0.779855, l1: 0.786561, l2: 0.785024, l3: 0.781940, l4: 0.772932, l5: 0.772957, l6: 0.882951

[epoch:  20/100, batch:   492/  792, ite: 45450] train loss: 5.423686, tar: 0.575716 
l0: 0.372070, l1: 0.373272, l2: 0.374765, l3: 0.380577, l4: 0.390305, l5: 0.502812, l6: 0.553420

[epoch:  20/100, batch:   494/  792, ite: 45451] train loss: 5.422372, tar: 0.57

[epoch:  20/100, batch:   574/  792, ite: 45491] train loss: 5.415089, tar: 0.574918 
l0: 0.648439, l1: 0.654535, l2: 0.654999, l3: 0.673060, l4: 0.712724, l5: 0.859222, l6: 1.071695

[epoch:  20/100, batch:   576/  792, ite: 45492] train loss: 5.415869, tar: 0.574968 
l0: 0.624989, l1: 0.628568, l2: 0.634192, l3: 0.647205, l4: 0.720861, l5: 0.993110, l6: 1.121643

[epoch:  20/100, batch:   578/  792, ite: 45493] train loss: 5.416540, tar: 0.575001 
l0: 0.742066, l1: 0.757779, l2: 0.750198, l3: 0.746649, l4: 0.737536, l5: 0.803843, l6: 0.981863

[epoch:  20/100, batch:   580/  792, ite: 45494] train loss: 5.417309, tar: 0.575113 
l0: 0.284329, l1: 0.287887, l2: 0.292172, l3: 0.302792, l4: 0.352245, l5: 0.443896, l6: 0.652455

[epoch:  20/100, batch:   582/  792, ite: 45495] train loss: 5.415892, tar: 0.574918 
l0: 0.724092, l1: 0.726686, l2: 0.736399, l3: 0.750333, l4: 0.849881, l5: 0.804724, l6: 0.876191

[epoch:  20/100, batch:   584/  792, ite: 45496] train loss: 5.416594, tar: 0.57

[epoch:  20/100, batch:   664/  792, ite: 45536] train loss: 5.414241, tar: 0.574792 
l0: 0.359555, l1: 0.363162, l2: 0.357857, l3: 0.357717, l4: 0.403540, l5: 0.526917, l6: 0.753650

[epoch:  20/100, batch:   666/  792, ite: 45537] train loss: 5.413291, tar: 0.574652 
l0: 0.564416, l1: 0.569914, l2: 0.572377, l3: 0.590348, l4: 0.625741, l5: 0.721173, l6: 1.081212

[epoch:  20/100, batch:   668/  792, ite: 45538] train loss: 5.413506, tar: 0.574645 
l0: 0.624376, l1: 0.632056, l2: 0.634180, l3: 0.642835, l4: 0.650632, l5: 0.744120, l6: 0.913736

[epoch:  20/100, batch:   670/  792, ite: 45539] train loss: 5.413784, tar: 0.574678 
l0: 0.620459, l1: 0.624448, l2: 0.619962, l3: 0.621615, l4: 0.640601, l5: 0.725112, l6: 0.666840

[epoch:  20/100, batch:   672/  792, ite: 45540] train loss: 5.413723, tar: 0.574707 
l0: 0.380540, l1: 0.380108, l2: 0.376804, l3: 0.375417, l4: 0.395154, l5: 0.477523, l6: 0.601188

[epoch:  20/100, batch:   674/  792, ite: 45541] train loss: 5.412565, tar: 0.57

[epoch:  20/100, batch:   754/  792, ite: 45581] train loss: 5.421770, tar: 0.575650 
l0: 0.558142, l1: 0.562061, l2: 0.567712, l3: 0.577454, l4: 0.613145, l5: 0.746036, l6: 0.967839

[epoch:  20/100, batch:   756/  792, ite: 45582] train loss: 5.421879, tar: 0.575639 
l0: 0.744063, l1: 0.803470, l2: 0.771605, l3: 0.805209, l4: 0.881161, l5: 1.050069, l6: 1.619871

[epoch:  20/100, batch:   758/  792, ite: 45583] train loss: 5.423748, tar: 0.575746 
l0: 0.298096, l1: 0.301820, l2: 0.299620, l3: 0.300747, l4: 0.311961, l5: 0.406152, l6: 0.542814

[epoch:  20/100, batch:   760/  792, ite: 45584] train loss: 5.422264, tar: 0.575570 
l0: 0.487786, l1: 0.496921, l2: 0.499354, l3: 0.503431, l4: 0.541631, l5: 0.684297, l6: 0.862379

[epoch:  20/100, batch:   762/  792, ite: 45585] train loss: 5.421971, tar: 0.575515 
l0: 0.434273, l1: 0.439184, l2: 0.441322, l3: 0.440572, l4: 0.491740, l5: 0.588486, l6: 0.693620

[epoch:  20/100, batch:   764/  792, ite: 45586] train loss: 5.421255, tar: 0.57

l0: 0.952397, l1: 0.951070, l2: 0.951986, l3: 0.953535, l4: 0.970723, l5: 0.994465, l6: 1.017709

[epoch:  21/100, batch:    52/  792, ite: 45626] train loss: 5.414640, tar: 0.574653 
l0: 0.261091, l1: 0.268924, l2: 0.264815, l3: 0.269922, l4: 0.293670, l5: 0.433184, l6: 0.586268

[epoch:  21/100, batch:    54/  792, ite: 45627] train loss: 5.413145, tar: 0.574461 
l0: 0.473826, l1: 0.476178, l2: 0.478568, l3: 0.479919, l4: 0.523553, l5: 0.721690, l6: 0.917112

[epoch:  21/100, batch:    56/  792, ite: 45628] train loss: 5.412952, tar: 0.574399 
l0: 0.407368, l1: 0.411110, l2: 0.411678, l3: 0.430346, l4: 0.479811, l5: 0.585082, l6: 0.666189

[epoch:  21/100, batch:    58/  792, ite: 45629] train loss: 5.412188, tar: 0.574296 
l0: 0.501732, l1: 0.507968, l2: 0.502411, l3: 0.496766, l4: 0.506986, l5: 0.492744, l6: 0.593528

[epoch:  21/100, batch:    60/  792, ite: 45630] train loss: 5.411444, tar: 0.574252 
l0: 0.896041, l1: 0.905200, l2: 0.903904, l3: 0.922233, l4: 0.997599, l5: 1.0622

[epoch:  21/100, batch:   140/  792, ite: 45670] train loss: 5.403106, tar: 0.573328 
l0: 0.318137, l1: 0.316331, l2: 0.315348, l3: 0.318592, l4: 0.344844, l5: 0.475397, l6: 0.538825

[epoch:  21/100, batch:   142/  792, ite: 45671] train loss: 5.401836, tar: 0.573175 
l0: 0.933363, l1: 0.942246, l2: 0.944885, l3: 0.943732, l4: 0.961549, l5: 0.974114, l6: 1.087822

[epoch:  21/100, batch:   144/  792, ite: 45672] train loss: 5.403382, tar: 0.573390 
l0: 0.580360, l1: 0.588020, l2: 0.590452, l3: 0.597819, l4: 0.662960, l5: 0.720433, l6: 0.995180

[epoch:  21/100, batch:   146/  792, ite: 45673] train loss: 5.403559, tar: 0.573395 
l0: 0.462985, l1: 0.468875, l2: 0.466714, l3: 0.467063, l4: 0.493476, l5: 0.569575, l6: 0.722105

[epoch:  21/100, batch:   148/  792, ite: 45674] train loss: 5.403022, tar: 0.573329 
l0: 0.274685, l1: 0.289063, l2: 0.287881, l3: 0.293077, l4: 0.341691, l5: 0.438880, l6: 0.588198

[epoch:  21/100, batch:   150/  792, ite: 45675] train loss: 5.401668, tar: 0.57

[epoch:  21/100, batch:   230/  792, ite: 45715] train loss: 5.400096, tar: 0.572871 
l0: 0.359171, l1: 0.360003, l2: 0.361717, l3: 0.362982, l4: 0.386158, l5: 0.540090, l6: 0.612599

[epoch:  21/100, batch:   232/  792, ite: 45716] train loss: 5.399068, tar: 0.572746 
l0: 0.562381, l1: 0.554963, l2: 0.549972, l3: 0.543769, l4: 0.581755, l5: 0.706995, l6: 0.931274

[epoch:  21/100, batch:   234/  792, ite: 45717] train loss: 5.399098, tar: 0.572740 
l0: 2.440308, l1: 2.419099, l2: 2.411302, l3: 2.330090, l4: 2.343826, l5: 2.380859, l6: 2.779305

[epoch:  21/100, batch:   236/  792, ite: 45718] train loss: 5.407671, tar: 0.573827 
l0: 0.495410, l1: 0.496219, l2: 0.499460, l3: 0.506589, l4: 0.509130, l5: 0.549308, l6: 0.657578

[epoch:  21/100, batch:   238/  792, ite: 45719] train loss: 5.407147, tar: 0.573782 
l0: 0.422967, l1: 0.427020, l2: 0.424079, l3: 0.431527, l4: 0.476685, l5: 0.582870, l6: 0.723439

[epoch:  21/100, batch:   240/  792, ite: 45720] train loss: 5.406473, tar: 0.57

[epoch:  21/100, batch:   320/  792, ite: 45760] train loss: 5.395730, tar: 0.572253 
l0: 0.737518, l1: 0.742367, l2: 0.749916, l3: 0.772408, l4: 0.786560, l5: 0.779875, l6: 0.955923

[epoch:  21/100, batch:   322/  792, ite: 45761] train loss: 5.396424, tar: 0.572347 
l0: 0.744216, l1: 0.747813, l2: 0.756493, l3: 0.756601, l4: 0.784132, l5: 0.777692, l6: 0.948014

[epoch:  21/100, batch:   324/  792, ite: 45762] train loss: 5.397087, tar: 0.572445 
l0: 0.473773, l1: 0.477704, l2: 0.479794, l3: 0.495063, l4: 0.499157, l5: 0.558622, l6: 0.731341

[epoch:  21/100, batch:   326/  792, ite: 45763] train loss: 5.396630, tar: 0.572389 
l0: 0.397021, l1: 0.398169, l2: 0.397188, l3: 0.399888, l4: 0.425857, l5: 0.570351, l6: 0.701424

[epoch:  21/100, batch:   328/  792, ite: 45764] train loss: 5.395839, tar: 0.572289 
l0: 0.480952, l1: 0.483201, l2: 0.484992, l3: 0.488736, l4: 0.541497, l5: 0.605119, l6: 0.761246

[epoch:  21/100, batch:   330/  792, ite: 45765] train loss: 5.395418, tar: 0.57

[epoch:  21/100, batch:   410/  792, ite: 45805] train loss: 5.396549, tar: 0.572175 
l0: 0.542315, l1: 0.546232, l2: 0.544365, l3: 0.549376, l4: 0.623392, l5: 0.716913, l6: 0.827062

[epoch:  21/100, batch:   412/  792, ite: 45806] train loss: 5.396467, tar: 0.572159 
l0: 0.645615, l1: 0.649996, l2: 0.647374, l3: 0.647620, l4: 0.679711, l5: 0.735869, l6: 0.930266

[epoch:  21/100, batch:   414/  792, ite: 45807] train loss: 5.396776, tar: 0.572199 
l0: 0.364862, l1: 0.375114, l2: 0.375717, l3: 0.379416, l4: 0.422941, l5: 0.599060, l6: 0.851742

[epoch:  21/100, batch:   416/  792, ite: 45808] train loss: 5.396097, tar: 0.572085 
l0: 0.281827, l1: 0.284888, l2: 0.276866, l3: 0.270561, l4: 0.305231, l5: 0.365701, l6: 0.473547

[epoch:  21/100, batch:   418/  792, ite: 45809] train loss: 5.394659, tar: 0.571924 
l0: 0.630505, l1: 0.646368, l2: 0.652457, l3: 0.669178, l4: 0.719398, l5: 0.721363, l6: 0.792790

[epoch:  21/100, batch:   420/  792, ite: 45810] train loss: 5.394791, tar: 0.57

[epoch:  21/100, batch:   500/  792, ite: 45850] train loss: 5.395522, tar: 0.572303 
l0: 0.715841, l1: 0.724185, l2: 0.719085, l3: 0.719695, l4: 0.739109, l5: 0.759200, l6: 0.817891

[epoch:  21/100, batch:   502/  792, ite: 45851] train loss: 5.395886, tar: 0.572380 
l0: 0.496731, l1: 0.500951, l2: 0.500922, l3: 0.506490, l4: 0.533194, l5: 0.685959, l6: 0.850472

[epoch:  21/100, batch:   504/  792, ite: 45852] train loss: 5.395752, tar: 0.572339 
l0: 0.629736, l1: 0.637431, l2: 0.637896, l3: 0.638600, l4: 0.648190, l5: 0.787226, l6: 0.941896

[epoch:  21/100, batch:   506/  792, ite: 45853] train loss: 5.395967, tar: 0.572370 
l0: 0.490397, l1: 0.491767, l2: 0.494472, l3: 0.491865, l4: 0.521752, l5: 0.661875, l6: 0.750567

[epoch:  21/100, batch:   508/  792, ite: 45854] train loss: 5.395561, tar: 0.572326 
l0: 0.329547, l1: 0.332395, l2: 0.329396, l3: 0.329175, l4: 0.346065, l5: 0.497812, l6: 0.699015

[epoch:  21/100, batch:   510/  792, ite: 45855] train loss: 5.394594, tar: 0.57

[epoch:  21/100, batch:   590/  792, ite: 45895] train loss: 5.395897, tar: 0.572153 
l0: 0.764173, l1: 0.771088, l2: 0.770313, l3: 0.768352, l4: 0.811025, l5: 0.879066, l6: 0.962087

[epoch:  21/100, batch:   592/  792, ite: 45896] train loss: 5.396575, tar: 0.572255 
l0: 1.076830, l1: 1.075938, l2: 1.073140, l3: 1.075834, l4: 1.144129, l5: 1.212254, l6: 1.512319

[epoch:  21/100, batch:   594/  792, ite: 45897] train loss: 5.398868, tar: 0.572521 
l0: 0.575078, l1: 0.577888, l2: 0.576751, l3: 0.578946, l4: 0.606982, l5: 0.655828, l6: 0.853512

[epoch:  21/100, batch:   596/  792, ite: 45898] train loss: 5.398867, tar: 0.572522 
l0: 0.784964, l1: 0.790702, l2: 0.799525, l3: 0.800492, l4: 0.802660, l5: 0.915604, l6: 1.011550

[epoch:  21/100, batch:   598/  792, ite: 45899] train loss: 5.399672, tar: 0.572634 
l0: 0.520877, l1: 0.522867, l2: 0.524923, l3: 0.521948, l4: 0.540092, l5: 0.594598, l6: 0.709502

[epoch:  21/100, batch:   600/  792, ite: 45900] train loss: 5.399295, tar: 0.57

[epoch:  21/100, batch:   680/  792, ite: 45940] train loss: 5.388674, tar: 0.571460 
l0: 0.592119, l1: 0.598657, l2: 0.598547, l3: 0.593094, l4: 0.583506, l5: 0.606505, l6: 0.817756

[epoch:  21/100, batch:   682/  792, ite: 45941] train loss: 5.388652, tar: 0.571471 
l0: 0.545527, l1: 0.559691, l2: 0.557476, l3: 0.554612, l4: 0.560192, l5: 0.583749, l6: 0.788180

[epoch:  21/100, batch:   684/  792, ite: 45942] train loss: 5.388399, tar: 0.571458 
l0: 0.406420, l1: 0.415078, l2: 0.413166, l3: 0.423519, l4: 0.464536, l5: 0.594809, l6: 0.757847

[epoch:  21/100, batch:   686/  792, ite: 45943] train loss: 5.387796, tar: 0.571373 
l0: 0.633851, l1: 0.644538, l2: 0.640728, l3: 0.642892, l4: 0.697945, l5: 0.775527, l6: 0.856426

[epoch:  21/100, batch:   688/  792, ite: 45944] train loss: 5.388065, tar: 0.571405 
l0: 0.676209, l1: 0.674729, l2: 0.671391, l3: 0.672617, l4: 0.686325, l5: 0.727363, l6: 0.913630

[epoch:  21/100, batch:   690/  792, ite: 45945] train loss: 5.388464, tar: 0.57

[epoch:  21/100, batch:   770/  792, ite: 45985] train loss: 5.398215, tar: 0.572587 
l0: 0.499303, l1: 0.502317, l2: 0.500440, l3: 0.491387, l4: 0.545583, l5: 0.635466, l6: 0.815294

[epoch:  21/100, batch:   772/  792, ite: 45986] train loss: 5.397947, tar: 0.572550 
l0: 0.976756, l1: 0.961930, l2: 0.958265, l3: 0.961381, l4: 1.004331, l5: 1.120090, l6: 1.612574

[epoch:  21/100, batch:   774/  792, ite: 45987] train loss: 5.399863, tar: 0.572754 
l0: 0.814724, l1: 0.817077, l2: 0.795946, l3: 0.804728, l4: 0.849221, l5: 0.841939, l6: 1.023102

[epoch:  21/100, batch:   776/  792, ite: 45988] train loss: 5.400689, tar: 0.572876 
l0: 0.352826, l1: 0.348588, l2: 0.349360, l3: 0.369564, l4: 0.365325, l5: 0.454181, l6: 0.499114

[epoch:  21/100, batch:   778/  792, ite: 45989] train loss: 5.399671, tar: 0.572765 
l0: 1.235899, l1: 1.246543, l2: 1.236389, l3: 1.225346, l4: 1.255843, l5: 1.342187, l6: 1.398579

[epoch:  21/100, batch:   780/  792, ite: 45990] train loss: 5.402260, tar: 0.57

l0: 0.937986, l1: 0.941679, l2: 0.944355, l3: 0.949163, l4: 0.939317, l5: 0.960743, l6: 1.259943

[epoch:  22/100, batch:    68/  792, ite: 46030] train loss: 5.105798, tar: 0.528907 
l0: 0.552441, l1: 0.553607, l2: 0.557479, l3: 0.564541, l4: 0.601677, l5: 0.693070, l6: 0.774993

[epoch:  22/100, batch:    70/  792, ite: 46031] train loss: 5.114269, tar: 0.529666 
l0: 0.494176, l1: 0.496289, l2: 0.496755, l3: 0.500390, l4: 0.509903, l5: 0.606791, l6: 0.723193

[epoch:  22/100, batch:    72/  792, ite: 46032] train loss: 5.098195, tar: 0.528557 
l0: 0.321421, l1: 0.333557, l2: 0.337402, l3: 0.351992, l4: 0.405931, l5: 0.521593, l6: 0.618593

[epoch:  22/100, batch:    74/  792, ite: 46033] train loss: 5.053520, tar: 0.522280 
l0: 0.660408, l1: 0.662909, l2: 0.653856, l3: 0.656085, l4: 0.684650, l5: 0.771347, l6: 0.894605

[epoch:  22/100, batch:    76/  792, ite: 46034] train loss: 5.079758, tar: 0.526343 
l0: 0.905115, l1: 0.914042, l2: 0.906796, l3: 0.924770, l4: 0.944066, l5: 1.1182

[epoch:  22/100, batch:   156/  792, ite: 46074] train loss: 5.201306, tar: 0.543822 
l0: 0.417674, l1: 0.417761, l2: 0.418072, l3: 0.434242, l4: 0.492744, l5: 0.612226, l6: 0.701409

[epoch:  22/100, batch:   158/  792, ite: 46075] train loss: 5.188407, tar: 0.542140 
l0: 0.283456, l1: 0.288847, l2: 0.286214, l3: 0.285815, l4: 0.288408, l5: 0.374057, l6: 0.539472

[epoch:  22/100, batch:   160/  792, ite: 46076] train loss: 5.158349, tar: 0.538737 
l0: 0.593333, l1: 0.598113, l2: 0.601362, l3: 0.601225, l4: 0.641731, l5: 0.738659, l6: 0.906422

[epoch:  22/100, batch:   162/  792, ite: 46077] train loss: 5.164833, tar: 0.539446 
l0: 0.559557, l1: 0.553237, l2: 0.559599, l3: 0.573579, l4: 0.548161, l5: 0.632975, l6: 0.684293

[epoch:  22/100, batch:   164/  792, ite: 46078] train loss: 5.161391, tar: 0.539703 
l0: 0.390536, l1: 0.391446, l2: 0.389670, l3: 0.395953, l4: 0.408136, l5: 0.484930, l6: 0.534129

[epoch:  22/100, batch:   166/  792, ite: 46079] train loss: 5.142009, tar: 0.53

[epoch:  22/100, batch:   246/  792, ite: 46119] train loss: 5.177342, tar: 0.545552 
l0: 0.368317, l1: 0.371332, l2: 0.372373, l3: 0.383990, l4: 0.395934, l5: 0.478613, l6: 0.646819

[epoch:  22/100, batch:   248/  792, ite: 46120] train loss: 5.165296, tar: 0.544075 
l0: 0.795636, l1: 0.798051, l2: 0.797976, l3: 0.808532, l4: 0.815632, l5: 0.856513, l6: 1.001551

[epoch:  22/100, batch:   250/  792, ite: 46121] train loss: 5.179979, tar: 0.546154 
l0: 0.486362, l1: 0.484920, l2: 0.484206, l3: 0.486320, l4: 0.539966, l5: 0.697435, l6: 0.864493

[epoch:  22/100, batch:   252/  792, ite: 46122] train loss: 5.177438, tar: 0.545663 
l0: 0.960539, l1: 0.968761, l2: 0.966662, l3: 0.972234, l4: 1.006198, l5: 1.052193, l6: 1.023432

[epoch:  22/100, batch:   254/  792, ite: 46123] train loss: 5.200296, tar: 0.549036 
l0: 0.880310, l1: 0.883977, l2: 0.880599, l3: 0.883926, l4: 0.883353, l5: 0.965174, l6: 1.178693

[epoch:  22/100, batch:   256/  792, ite: 46124] train loss: 5.221457, tar: 0.55

[epoch:  22/100, batch:   336/  792, ite: 46164] train loss: 5.212198, tar: 0.549388 
l0: 0.483604, l1: 0.491108, l2: 0.490618, l3: 0.484372, l4: 0.499890, l5: 0.519896, l6: 0.593125

[epoch:  22/100, batch:   338/  792, ite: 46165] train loss: 5.205892, tar: 0.548989 
l0: 0.355898, l1: 0.351090, l2: 0.355807, l3: 0.364246, l4: 0.385562, l5: 0.519360, l6: 0.697858

[epoch:  22/100, batch:   340/  792, ite: 46166] train loss: 5.197437, tar: 0.547826 
l0: 0.650916, l1: 0.648977, l2: 0.646834, l3: 0.641481, l4: 0.655536, l5: 0.719327, l6: 0.833044

[epoch:  22/100, batch:   342/  792, ite: 46167] train loss: 5.200554, tar: 0.548443 
l0: 0.581359, l1: 0.589343, l2: 0.589352, l3: 0.587979, l4: 0.615247, l5: 0.679158, l6: 0.926391

[epoch:  22/100, batch:   344/  792, ite: 46168] train loss: 5.201786, tar: 0.548639 
l0: 0.503004, l1: 0.508036, l2: 0.508323, l3: 0.516495, l4: 0.557589, l5: 0.630853, l6: 0.747019

[epoch:  22/100, batch:   346/  792, ite: 46169] train loss: 5.198712, tar: 0.54

[epoch:  22/100, batch:   426/  792, ite: 46209] train loss: 5.272069, tar: 0.556112 
l0: 0.245591, l1: 0.249745, l2: 0.246512, l3: 0.247643, l4: 0.282683, l5: 0.360202, l6: 0.386101

[epoch:  22/100, batch:   428/  792, ite: 46210] train loss: 5.258653, tar: 0.554634 
l0: 0.753260, l1: 0.757490, l2: 0.755753, l3: 0.742237, l4: 0.768156, l5: 0.765616, l6: 0.844189

[epoch:  22/100, batch:   430/  792, ite: 46211] train loss: 5.263351, tar: 0.555575 
l0: 0.704786, l1: 0.708245, l2: 0.708169, l3: 0.716770, l4: 0.743949, l5: 0.818227, l6: 0.990433

[epoch:  22/100, batch:   432/  792, ite: 46212] train loss: 5.269175, tar: 0.556279 
l0: 0.508701, l1: 0.511688, l2: 0.517623, l3: 0.526886, l4: 0.537511, l5: 0.566587, l6: 0.592799

[epoch:  22/100, batch:   434/  792, ite: 46213] train loss: 5.265478, tar: 0.556056 
l0: 0.520157, l1: 0.525170, l2: 0.531222, l3: 0.547835, l4: 0.593519, l5: 0.745822, l6: 0.814920

[epoch:  22/100, batch:   436/  792, ite: 46214] train loss: 5.265416, tar: 0.55

[epoch:  22/100, batch:   516/  792, ite: 46254] train loss: 5.298669, tar: 0.560793 
l0: 0.417367, l1: 0.418043, l2: 0.418045, l3: 0.422891, l4: 0.436201, l5: 0.478236, l6: 0.580611

[epoch:  22/100, batch:   518/  792, ite: 46255] train loss: 5.292647, tar: 0.560231 
l0: 0.317129, l1: 0.319930, l2: 0.319069, l3: 0.324405, l4: 0.364698, l5: 0.442574, l6: 0.564511

[epoch:  22/100, batch:   520/  792, ite: 46256] train loss: 5.284494, tar: 0.559281 
l0: 0.607409, l1: 0.608286, l2: 0.600273, l3: 0.592427, l4: 0.605481, l5: 0.630622, l6: 0.739009

[epoch:  22/100, batch:   522/  792, ite: 46257] train loss: 5.284065, tar: 0.559468 
l0: 0.663412, l1: 0.684266, l2: 0.682616, l3: 0.689041, l4: 0.699776, l5: 0.780256, l6: 0.977042

[epoch:  22/100, batch:   524/  792, ite: 46258] train loss: 5.287612, tar: 0.559871 
l0: 0.472894, l1: 0.473873, l2: 0.470599, l3: 0.475846, l4: 0.496118, l5: 0.537576, l6: 0.662043

[epoch:  22/100, batch:   526/  792, ite: 46259] train loss: 5.283536, tar: 0.55

[epoch:  22/100, batch:   606/  792, ite: 46299] train loss: 5.312286, tar: 0.563140 
l0: 0.250631, l1: 0.248723, l2: 0.252618, l3: 0.263893, l4: 0.306713, l5: 0.435744, l6: 0.530908

[epoch:  22/100, batch:   608/  792, ite: 46300] train loss: 5.304167, tar: 0.562098 
l0: 0.593003, l1: 0.584557, l2: 0.589038, l3: 0.605172, l4: 0.635376, l5: 0.732842, l6: 0.940021

[epoch:  22/100, batch:   610/  792, ite: 46301] train loss: 5.305919, tar: 0.562201 
l0: 0.531924, l1: 0.533228, l2: 0.530803, l3: 0.530617, l4: 0.530049, l5: 0.602004, l6: 0.843006

[epoch:  22/100, batch:   612/  792, ite: 46302] train loss: 5.305459, tar: 0.562101 
l0: 0.422439, l1: 0.424110, l2: 0.422941, l3: 0.431377, l4: 0.430237, l5: 0.455695, l6: 0.598149

[epoch:  22/100, batch:   614/  792, ite: 46303] train loss: 5.300595, tar: 0.561640 
l0: 0.384434, l1: 0.391101, l2: 0.387882, l3: 0.390997, l4: 0.419566, l5: 0.640712, l6: 0.716153

[epoch:  22/100, batch:   616/  792, ite: 46304] train loss: 5.296968, tar: 0.56

[epoch:  22/100, batch:   696/  792, ite: 46344] train loss: 5.319386, tar: 0.564366 
l0: 0.570268, l1: 0.571369, l2: 0.574430, l3: 0.570903, l4: 0.588979, l5: 0.694113, l6: 1.068829

[epoch:  22/100, batch:   698/  792, ite: 46345] train loss: 5.320717, tar: 0.564383 
l0: 0.473708, l1: 0.480767, l2: 0.479625, l3: 0.486482, l4: 0.497424, l5: 0.615903, l6: 0.775431

[epoch:  22/100, batch:   700/  792, ite: 46346] train loss: 5.318725, tar: 0.564121 
l0: 0.671504, l1: 0.689178, l2: 0.678442, l3: 0.700137, l4: 0.712294, l5: 0.880382, l6: 1.055239

[epoch:  22/100, batch:   702/  792, ite: 46347] train loss: 5.322156, tar: 0.564431 
l0: 0.755386, l1: 0.761168, l2: 0.767117, l3: 0.776065, l4: 0.771690, l5: 0.806776, l6: 0.922084

[epoch:  22/100, batch:   704/  792, ite: 46348] train loss: 5.325894, tar: 0.564979 
l0: 0.514351, l1: 0.532539, l2: 0.536384, l3: 0.551996, l4: 0.591946, l5: 0.684445, l6: 0.961610

[epoch:  22/100, batch:   706/  792, ite: 46349] train loss: 5.325974, tar: 0.56

[epoch:  22/100, batch:   786/  792, ite: 46389] train loss: 5.323484, tar: 0.563882 
l0: 1.045104, l1: 1.051938, l2: 1.049869, l3: 1.046884, l4: 1.048672, l5: 1.119062, l6: 1.103870

[epoch:  22/100, batch:   788/  792, ite: 46390] train loss: 5.331781, tar: 0.565116 
l0: 0.644868, l1: 0.649222, l2: 0.648401, l3: 0.660196, l4: 0.705876, l5: 0.774631, l6: 0.866471

[epoch:  22/100, batch:   790/  792, ite: 46391] train loss: 5.333097, tar: 0.565319 
l0: 0.572421, l1: 0.577854, l2: 0.588913, l3: 0.591668, l4: 0.613006, l5: 0.727382, l6: 0.890042

[epoch:  22/100, batch:   792/  792, ite: 46392] train loss: 5.333669, tar: 0.565338 
Starting epoch 23
Epoch 23 loading complete
l0: 0.354611, l1: 0.356076, l2: 0.355672, l3: 0.366976, l4: 0.409440, l5: 0.489486, l6: 0.700451

[epoch:  23/100, batch:     2/  792, ite: 46393] train loss: 5.329573, tar: 0.564801 
l0: 0.334772, l1: 0.340771, l2: 0.340103, l3: 0.334050, l4: 0.347926, l5: 0.380940, l6: 0.525245

[epoch:  23/100, batch:     4/  792,

l0: 0.400149, l1: 0.404699, l2: 0.405450, l3: 0.408056, l4: 0.436006, l5: 0.607718, l6: 0.599140

[epoch:  23/100, batch:    84/  792, ite: 46434] train loss: 5.364865, tar: 0.569192 
l0: 1.395223, l1: 1.404955, l2: 1.389259, l3: 1.402507, l4: 1.391963, l5: 1.451909, l6: 1.419196

[epoch:  23/100, batch:    86/  792, ite: 46435] train loss: 5.378306, tar: 0.571091 
l0: 0.856080, l1: 0.863541, l2: 0.855787, l3: 0.844654, l4: 0.867176, l5: 0.943839, l6: 1.049279

[epoch:  23/100, batch:    88/  792, ite: 46436] train loss: 5.383161, tar: 0.571745 
l0: 0.439333, l1: 0.440489, l2: 0.438546, l3: 0.433202, l4: 0.446163, l5: 0.507893, l6: 0.613953

[epoch:  23/100, batch:    90/  792, ite: 46437] train loss: 5.379953, tar: 0.571442 
l0: 0.746027, l1: 0.757875, l2: 0.757815, l3: 0.756538, l4: 0.808172, l5: 0.930980, l6: 1.091262

[epoch:  23/100, batch:    92/  792, ite: 46438] train loss: 5.383681, tar: 0.571840 
l0: 0.611319, l1: 0.620280, l2: 0.617741, l3: 0.624488, l4: 0.649806, l5: 0.7880

[epoch:  23/100, batch:   172/  792, ite: 46478] train loss: 5.372469, tar: 0.570531 
l0: 0.968246, l1: 0.976477, l2: 0.973254, l3: 0.971692, l4: 0.978339, l5: 1.102516, l6: 1.377745

[epoch:  23/100, batch:   174/  792, ite: 46479] train loss: 5.379808, tar: 0.571361 
l0: 0.421959, l1: 0.428702, l2: 0.428415, l3: 0.426036, l4: 0.457487, l5: 0.584294, l6: 0.749531

[epoch:  23/100, batch:   176/  792, ite: 46480] train loss: 5.377826, tar: 0.571050 
l0: 0.828651, l1: 0.830037, l2: 0.830025, l3: 0.839451, l4: 0.854468, l5: 0.890511, l6: 1.001144

[epoch:  23/100, batch:   178/  792, ite: 46481] train loss: 5.381654, tar: 0.571585 
l0: 0.354809, l1: 0.353750, l2: 0.355559, l3: 0.356903, l4: 0.395135, l5: 0.573164, l6: 0.768310

[epoch:  23/100, batch:   180/  792, ite: 46482] train loss: 5.378622, tar: 0.571135 
l0: 0.613123, l1: 0.622655, l2: 0.623242, l3: 0.628398, l4: 0.645943, l5: 0.654117, l6: 0.743959

[epoch:  23/100, batch:   182/  792, ite: 46483] train loss: 5.378711, tar: 0.57

[epoch:  23/100, batch:   262/  792, ite: 46523] train loss: 5.361402, tar: 0.569316 
l0: 0.745273, l1: 0.743277, l2: 0.743345, l3: 0.743526, l4: 0.742214, l5: 0.755332, l6: 1.132212

[epoch:  23/100, batch:   264/  792, ite: 46524] train loss: 5.363959, tar: 0.569651 
l0: 0.782304, l1: 0.779817, l2: 0.782974, l3: 0.788259, l4: 0.825330, l5: 0.898969, l6: 0.849143

[epoch:  23/100, batch:   266/  792, ite: 46525] train loss: 5.366378, tar: 0.570056 
l0: 1.503114, l1: 1.436785, l2: 1.442471, l3: 1.443530, l4: 1.521857, l5: 1.461096, l6: 2.046863

[epoch:  23/100, batch:   268/  792, ite: 46526] train loss: 5.380933, tar: 0.571830 
l0: 0.365533, l1: 0.377886, l2: 0.376401, l3: 0.381603, l4: 0.424037, l5: 0.507215, l6: 0.704101

[epoch:  23/100, batch:   270/  792, ite: 46527] train loss: 5.378199, tar: 0.571439 
l0: 0.393505, l1: 0.397070, l2: 0.394694, l3: 0.411182, l4: 0.440839, l5: 0.561719, l6: 0.683489

[epoch:  23/100, batch:   272/  792, ite: 46528] train loss: 5.375622, tar: 0.57

[epoch:  23/100, batch:   352/  792, ite: 46568] train loss: 5.362799, tar: 0.569167 
l0: 0.812466, l1: 0.816231, l2: 0.824314, l3: 0.823243, l4: 0.803676, l5: 0.884115, l6: 0.963527

[epoch:  23/100, batch:   354/  792, ite: 46569] train loss: 5.365952, tar: 0.569594 
l0: 0.478419, l1: 0.476380, l2: 0.476645, l3: 0.472809, l4: 0.517847, l5: 0.643365, l6: 0.702460

[epoch:  23/100, batch:   356/  792, ite: 46570] train loss: 5.364536, tar: 0.569434 
l0: 0.732165, l1: 0.745182, l2: 0.744836, l3: 0.748338, l4: 0.757176, l5: 0.971659, l6: 1.101885

[epoch:  23/100, batch:   358/  792, ite: 46571] train loss: 5.367646, tar: 0.569719 
l0: 0.555411, l1: 0.553443, l2: 0.550572, l3: 0.550681, l4: 0.616578, l5: 0.741259, l6: 0.839872

[epoch:  23/100, batch:   360/  792, ite: 46572] train loss: 5.367561, tar: 0.569694 
l0: 0.664037, l1: 0.665846, l2: 0.664628, l3: 0.659957, l4: 0.688632, l5: 0.651337, l6: 0.635467

[epoch:  23/100, batch:   362/  792, ite: 46573] train loss: 5.367467, tar: 0.56

[epoch:  23/100, batch:   442/  792, ite: 46613] train loss: 5.348823, tar: 0.567999 
l0: 0.449971, l1: 0.454764, l2: 0.452832, l3: 0.460971, l4: 0.505827, l5: 0.563920, l6: 0.650592

[epoch:  23/100, batch:   444/  792, ite: 46614] train loss: 5.347100, tar: 0.567806 
l0: 0.491628, l1: 0.501162, l2: 0.501597, l3: 0.491519, l4: 0.509452, l5: 0.586203, l6: 0.713094

[epoch:  23/100, batch:   446/  792, ite: 46615] train loss: 5.345797, tar: 0.567683 
l0: 0.846292, l1: 0.851282, l2: 0.840469, l3: 0.824606, l4: 0.808054, l5: 0.872861, l6: 0.921253

[epoch:  23/100, batch:   448/  792, ite: 46616] train loss: 5.348427, tar: 0.568135 
l0: 0.280148, l1: 0.284682, l2: 0.284212, l3: 0.286141, l4: 0.322060, l5: 0.418679, l6: 0.592898

[epoch:  23/100, batch:   450/  792, ite: 46617] train loss: 5.344941, tar: 0.567668 
l0: 0.371709, l1: 0.370569, l2: 0.372366, l3: 0.382599, l4: 0.419360, l5: 0.493263, l6: 0.674262

[epoch:  23/100, batch:   452/  792, ite: 46618] train loss: 5.342510, tar: 0.56

[epoch:  23/100, batch:   532/  792, ite: 46658] train loss: 5.334209, tar: 0.566093 
l0: 0.535010, l1: 0.534533, l2: 0.533056, l3: 0.532573, l4: 0.558262, l5: 0.622163, l6: 0.832391

[epoch:  23/100, batch:   534/  792, ite: 46659] train loss: 5.333689, tar: 0.566046 
l0: 0.543468, l1: 0.550205, l2: 0.547705, l3: 0.550573, l4: 0.565720, l5: 0.668597, l6: 0.866046

[epoch:  23/100, batch:   536/  792, ite: 46660] train loss: 5.333477, tar: 0.566012 
l0: 0.351432, l1: 0.349004, l2: 0.348232, l3: 0.349843, l4: 0.420630, l5: 0.500360, l6: 0.563185

[epoch:  23/100, batch:   538/  792, ite: 46661] train loss: 5.330723, tar: 0.565687 
l0: 0.764138, l1: 0.775634, l2: 0.776987, l3: 0.791470, l4: 0.846822, l5: 0.910552, l6: 1.078338

[epoch:  23/100, batch:   540/  792, ite: 46662] train loss: 5.333418, tar: 0.565987 
l0: 0.444312, l1: 0.449512, l2: 0.453252, l3: 0.479668, l4: 0.495419, l5: 0.568579, l6: 0.677863

[epoch:  23/100, batch:   542/  792, ite: 46663] train loss: 5.331809, tar: 0.56

[epoch:  23/100, batch:   622/  792, ite: 46703] train loss: 5.326305, tar: 0.565043 
l0: 0.375467, l1: 0.378206, l2: 0.376753, l3: 0.381051, l4: 0.393789, l5: 0.462334, l6: 0.551697

[epoch:  23/100, batch:   624/  792, ite: 46704] train loss: 5.323844, tar: 0.564773 
l0: 0.418908, l1: 0.417787, l2: 0.421709, l3: 0.421838, l4: 0.446746, l5: 0.575560, l6: 0.726936

[epoch:  23/100, batch:   626/  792, ite: 46705] train loss: 5.322299, tar: 0.564567 
l0: 0.855769, l1: 0.845012, l2: 0.854612, l3: 0.896892, l4: 0.899574, l5: 1.039629, l6: 1.094419

[epoch:  23/100, batch:   628/  792, ite: 46706] train loss: 5.325511, tar: 0.564979 
l0: 0.433795, l1: 0.431455, l2: 0.440087, l3: 0.454031, l4: 0.478693, l5: 0.580881, l6: 0.706347

[epoch:  23/100, batch:   630/  792, ite: 46707] train loss: 5.324072, tar: 0.564793 
l0: 0.569053, l1: 0.573378, l2: 0.577994, l3: 0.588102, l4: 0.608122, l5: 0.775183, l6: 1.024865

[epoch:  23/100, batch:   632/  792, ite: 46708] train loss: 5.324907, tar: 0.56

[epoch:  23/100, batch:   712/  792, ite: 46748] train loss: 5.342551, tar: 0.567166 
l0: 0.468249, l1: 0.472037, l2: 0.468010, l3: 0.457695, l4: 0.479062, l5: 0.508173, l6: 0.629263

[epoch:  23/100, batch:   714/  792, ite: 46749] train loss: 5.340997, tar: 0.567034 
l0: 0.442710, l1: 0.446299, l2: 0.445964, l3: 0.448382, l4: 0.492790, l5: 0.639905, l6: 0.813880

[epoch:  23/100, batch:   716/  792, ite: 46750] train loss: 5.340052, tar: 0.566868 
l0: 0.570724, l1: 0.573482, l2: 0.574791, l3: 0.579739, l4: 0.607393, l5: 0.735137, l6: 0.902313

[epoch:  23/100, batch:   718/  792, ite: 46751] train loss: 5.340406, tar: 0.566873 
l0: 0.644925, l1: 0.650737, l2: 0.650282, l3: 0.646090, l4: 0.665669, l5: 0.731855, l6: 0.787903

[epoch:  23/100, batch:   720/  792, ite: 46752] train loss: 5.340730, tar: 0.566977 
l0: 0.773813, l1: 0.776063, l2: 0.774783, l3: 0.775277, l4: 0.782616, l5: 0.833808, l6: 0.989028

[epoch:  23/100, batch:   722/  792, ite: 46753] train loss: 5.342655, tar: 0.56

l0: 0.375845, l1: 0.381370, l2: 0.380528, l3: 0.388396, l4: 0.430815, l5: 0.564840, l6: 0.822982

[epoch:  24/100, batch:    10/  792, ite: 46793] train loss: 5.341117, tar: 0.566765 
l0: 0.249069, l1: 0.253355, l2: 0.258772, l3: 0.269731, l4: 0.309104, l5: 0.457398, l6: 0.650375

[epoch:  24/100, batch:    12/  792, ite: 46794] train loss: 5.338269, tar: 0.566365 
l0: 0.773465, l1: 0.778024, l2: 0.783051, l3: 0.783055, l4: 0.833015, l5: 0.852248, l6: 0.873098

[epoch:  24/100, batch:    14/  792, ite: 46795] train loss: 5.339879, tar: 0.566626 
l0: 0.346281, l1: 0.348374, l2: 0.345115, l3: 0.345475, l4: 0.350901, l5: 0.442699, l6: 0.664357

[epoch:  24/100, batch:    16/  792, ite: 46796] train loss: 5.337592, tar: 0.566349 
l0: 0.560021, l1: 0.556780, l2: 0.559128, l3: 0.565851, l4: 0.580682, l5: 0.637625, l6: 0.725008

[epoch:  24/100, batch:    18/  792, ite: 46797] train loss: 5.337061, tar: 0.566341 
l0: 0.487952, l1: 0.485705, l2: 0.487417, l3: 0.479168, l4: 0.497331, l5: 0.6202

[epoch:  24/100, batch:    98/  792, ite: 46837] train loss: 5.334297, tar: 0.566078 
l0: 0.470275, l1: 0.471304, l2: 0.469108, l3: 0.474183, l4: 0.490653, l5: 0.611560, l6: 0.776030

[epoch:  24/100, batch:   100/  792, ite: 46838] train loss: 5.333422, tar: 0.565964 
l0: 2.426770, l1: 2.417912, l2: 2.425608, l3: 2.408788, l4: 2.528777, l5: 2.463720, l6: 2.637439

[epoch:  24/100, batch:   102/  792, ite: 46839] train loss: 5.350735, tar: 0.568181 
l0: 0.361137, l1: 0.366930, l2: 0.367654, l3: 0.364802, l4: 0.390927, l5: 0.498130, l6: 0.685634

[epoch:  24/100, batch:   104/  792, ite: 46840] train loss: 5.348774, tar: 0.567935 
l0: 0.546657, l1: 0.560225, l2: 0.545475, l3: 0.541226, l4: 0.569424, l5: 0.656139, l6: 0.791691

[epoch:  24/100, batch:   106/  792, ite: 46841] train loss: 5.348459, tar: 0.567910 
l0: 0.540502, l1: 0.541927, l2: 0.543687, l3: 0.552273, l4: 0.582957, l5: 0.689821, l6: 0.790079

[epoch:  24/100, batch:   108/  792, ite: 46842] train loss: 5.348215, tar: 0.56

[epoch:  24/100, batch:   188/  792, ite: 46882] train loss: 5.365115, tar: 0.570193 
l0: 0.674773, l1: 0.694329, l2: 0.687100, l3: 0.710057, l4: 0.712272, l5: 0.846245, l6: 1.411995

[epoch:  24/100, batch:   190/  792, ite: 46883] train loss: 5.367475, tar: 0.570312 
l0: 0.776470, l1: 0.785824, l2: 0.791472, l3: 0.786454, l4: 0.795528, l5: 0.915265, l6: 1.108247

[epoch:  24/100, batch:   192/  792, ite: 46884] train loss: 5.369666, tar: 0.570545 
l0: 0.666617, l1: 0.662598, l2: 0.668973, l3: 0.672909, l4: 0.689525, l5: 0.851281, l6: 1.139417

[epoch:  24/100, batch:   194/  792, ite: 46885] train loss: 5.371036, tar: 0.570654 
l0: 0.877974, l1: 0.881194, l2: 0.879394, l3: 0.888128, l4: 0.907585, l5: 1.092923, l6: 1.476532

[epoch:  24/100, batch:   196/  792, ite: 46886] train loss: 5.374202, tar: 0.571000 
l0: 0.480745, l1: 0.482304, l2: 0.483660, l3: 0.486700, l4: 0.484541, l5: 0.523733, l6: 0.704563

[epoch:  24/100, batch:   198/  792, ite: 46887] train loss: 5.373139, tar: 0.57

[epoch:  24/100, batch:   278/  792, ite: 46927] train loss: 5.347030, tar: 0.567277 
l0: 0.803516, l1: 0.808067, l2: 0.805847, l3: 0.809977, l4: 0.814732, l5: 0.919870, l6: 1.045267

[epoch:  24/100, batch:   280/  792, ite: 46928] train loss: 5.348923, tar: 0.567531 
l0: 0.636751, l1: 0.637851, l2: 0.640092, l3: 0.638437, l4: 0.664483, l5: 0.710201, l6: 0.870419

[epoch:  24/100, batch:   282/  792, ite: 46929] train loss: 5.349340, tar: 0.567606 
l0: 0.413132, l1: 0.415672, l2: 0.416871, l3: 0.412122, l4: 0.433598, l5: 0.497083, l6: 0.582606

[epoch:  24/100, batch:   284/  792, ite: 46930] train loss: 5.347677, tar: 0.567439 
l0: 0.269199, l1: 0.267260, l2: 0.266981, l3: 0.269170, l4: 0.282711, l5: 0.382331, l6: 0.516322

[epoch:  24/100, batch:   286/  792, ite: 46931] train loss: 5.344957, tar: 0.567119 
l0: 0.316987, l1: 0.318442, l2: 0.321457, l3: 0.337811, l4: 0.350826, l5: 0.396641, l6: 0.580051

[epoch:  24/100, batch:   288/  792, ite: 46932] train loss: 5.342746, tar: 0.56

[epoch:  24/100, batch:   368/  792, ite: 46972] train loss: 5.334425, tar: 0.566171 
l0: 0.709790, l1: 0.722802, l2: 0.713743, l3: 0.716473, l4: 0.725486, l5: 0.841037, l6: 1.038805

[epoch:  24/100, batch:   370/  792, ite: 46973] train loss: 5.335670, tar: 0.566318 
l0: 0.669973, l1: 0.670001, l2: 0.669684, l3: 0.670224, l4: 0.682022, l5: 0.787465, l6: 0.950570

[epoch:  24/100, batch:   372/  792, ite: 46974] train loss: 5.336443, tar: 0.566425 
l0: 0.404623, l1: 0.408399, l2: 0.408733, l3: 0.408307, l4: 0.437023, l5: 0.532837, l6: 0.740759

[epoch:  24/100, batch:   374/  792, ite: 46975] train loss: 5.335249, tar: 0.566259 
l0: 0.772711, l1: 0.775898, l2: 0.773498, l3: 0.774148, l4: 0.813669, l5: 0.858712, l6: 1.029457

[epoch:  24/100, batch:   376/  792, ite: 46976] train loss: 5.336926, tar: 0.566470 
l0: 0.402851, l1: 0.403890, l2: 0.402103, l3: 0.406074, l4: 0.418409, l5: 0.501550, l6: 0.841264

[epoch:  24/100, batch:   378/  792, ite: 46977] train loss: 5.335770, tar: 0.56

[epoch:  24/100, batch:   458/  792, ite: 47017] train loss: 5.334126, tar: 0.566275 
l0: 0.897389, l1: 0.903268, l2: 0.900202, l3: 0.892959, l4: 0.923482, l5: 1.051232, l6: 1.166794

[epoch:  24/100, batch:   460/  792, ite: 47018] train loss: 5.336707, tar: 0.566600 
l0: 0.562100, l1: 0.572892, l2: 0.573517, l3: 0.572933, l4: 0.597677, l5: 0.664255, l6: 0.730642

[epoch:  24/100, batch:   462/  792, ite: 47019] train loss: 5.336335, tar: 0.566596 
l0: 0.644126, l1: 0.648371, l2: 0.648211, l3: 0.641901, l4: 0.680820, l5: 0.900167, l6: 1.048970

[epoch:  24/100, batch:   464/  792, ite: 47020] train loss: 5.337268, tar: 0.566672 
l0: 1.259574, l1: 1.279065, l2: 1.305173, l3: 1.335622, l4: 1.358198, l5: 1.318411, l6: 1.346877

[epoch:  24/100, batch:   466/  792, ite: 47021] train loss: 5.342400, tar: 0.567351 
l0: 0.817979, l1: 0.824380, l2: 0.820490, l3: 0.811552, l4: 0.816555, l5: 0.848656, l6: 0.907152

[epoch:  24/100, batch:   468/  792, ite: 47022] train loss: 5.343921, tar: 0.56

[epoch:  24/100, batch:   548/  792, ite: 47062] train loss: 5.339075, tar: 0.566446 
l0: 0.790351, l1: 0.788730, l2: 0.788664, l3: 0.790210, l4: 0.785303, l5: 0.890864, l6: 1.166260

[epoch:  24/100, batch:   550/  792, ite: 47063] train loss: 5.340871, tar: 0.566656 
l0: 0.331019, l1: 0.334006, l2: 0.334222, l3: 0.339623, l4: 0.354125, l5: 0.541099, l6: 0.774978

[epoch:  24/100, batch:   552/  792, ite: 47064] train loss: 5.339489, tar: 0.566435 
l0: 0.478175, l1: 0.479413, l2: 0.475945, l3: 0.476134, l4: 0.482866, l5: 0.668122, l6: 0.953529

[epoch:  24/100, batch:   554/  792, ite: 47065] train loss: 5.339141, tar: 0.566352 
l0: 0.320332, l1: 0.321380, l2: 0.322528, l3: 0.341191, l4: 0.342649, l5: 0.416645, l6: 0.541933

[epoch:  24/100, batch:   556/  792, ite: 47066] train loss: 5.337163, tar: 0.566121 
l0: 0.221481, l1: 0.220142, l2: 0.221096, l3: 0.231731, l4: 0.237457, l5: 0.360340, l6: 0.802631

[epoch:  24/100, batch:   558/  792, ite: 47067] train loss: 5.334939, tar: 0.56

[epoch:  24/100, batch:   638/  792, ite: 47107] train loss: 5.332723, tar: 0.565556 
l0: 0.384417, l1: 0.386067, l2: 0.387991, l3: 0.391711, l4: 0.422962, l5: 0.503812, l6: 0.654775

[epoch:  24/100, batch:   640/  792, ite: 47108] train loss: 5.331383, tar: 0.565392 
l0: 0.358133, l1: 0.360189, l2: 0.361729, l3: 0.355818, l4: 0.377942, l5: 0.509350, l6: 0.710654

[epoch:  24/100, batch:   642/  792, ite: 47109] train loss: 5.330013, tar: 0.565205 
l0: 0.913689, l1: 0.915625, l2: 0.910246, l3: 0.914043, l4: 0.964027, l5: 1.038785, l6: 1.330660

[epoch:  24/100, batch:   644/  792, ite: 47110] train loss: 5.332714, tar: 0.565519 
l0: 0.454409, l1: 0.454697, l2: 0.453149, l3: 0.459923, l4: 0.480083, l5: 0.496843, l6: 0.666712

[epoch:  24/100, batch:   646/  792, ite: 47111] train loss: 5.331677, tar: 0.565419 
l0: 0.660018, l1: 0.664398, l2: 0.663642, l3: 0.670189, l4: 0.662673, l5: 0.723459, l6: 0.878427

[epoch:  24/100, batch:   648/  792, ite: 47112] train loss: 5.332204, tar: 0.56

[epoch:  24/100, batch:   728/  792, ite: 47152] train loss: 5.330848, tar: 0.565172 
l0: 0.543350, l1: 0.544996, l2: 0.547262, l3: 0.546804, l4: 0.566471, l5: 0.622437, l6: 0.763986

[epoch:  24/100, batch:   730/  792, ite: 47153] train loss: 5.330630, tar: 0.565153 
l0: 0.600948, l1: 0.609297, l2: 0.611350, l3: 0.616503, l4: 0.638633, l5: 0.723978, l6: 0.896867

[epoch:  24/100, batch:   732/  792, ite: 47154] train loss: 5.330970, tar: 0.565184 
l0: 0.815191, l1: 0.828906, l2: 0.822857, l3: 0.838265, l4: 0.887789, l5: 0.926713, l6: 1.081748

[epoch:  24/100, batch:   734/  792, ite: 47155] train loss: 5.332560, tar: 0.565400 
l0: 0.714051, l1: 0.729330, l2: 0.728611, l3: 0.741981, l4: 0.761743, l5: 0.836126, l6: 0.913522

[epoch:  24/100, batch:   736/  792, ite: 47156] train loss: 5.333550, tar: 0.565529 
l0: 0.523130, l1: 0.516553, l2: 0.514902, l3: 0.520197, l4: 0.559774, l5: 0.661686, l6: 0.812092

[epoch:  24/100, batch:   738/  792, ite: 47157] train loss: 5.333518, tar: 0.56

l0: 0.553080, l1: 0.568062, l2: 0.565144, l3: 0.568276, l4: 0.562390, l5: 0.615856, l6: 0.851884

[epoch:  25/100, batch:    26/  792, ite: 47197] train loss: 5.316802, tar: 0.563571 
l0: 0.513801, l1: 0.513558, l2: 0.516161, l3: 0.524289, l4: 0.550205, l5: 0.581779, l6: 0.739923

[epoch:  25/100, batch:    28/  792, ite: 47198] train loss: 5.316349, tar: 0.563529 
l0: 0.500119, l1: 0.503172, l2: 0.502613, l3: 0.503155, l4: 0.527439, l5: 0.641857, l6: 0.809405

[epoch:  25/100, batch:    30/  792, ite: 47199] train loss: 5.315966, tar: 0.563477 
l0: 1.105479, l1: 1.112208, l2: 1.108563, l3: 1.098834, l4: 1.133972, l5: 1.219753, l6: 1.279158

[epoch:  25/100, batch:    32/  792, ite: 47200] train loss: 5.319324, tar: 0.563928 
l0: 0.561645, l1: 0.567757, l2: 0.571041, l3: 0.589904, l4: 0.616001, l5: 0.663977, l6: 0.745854

[epoch:  25/100, batch:    34/  792, ite: 47201] train loss: 5.319174, tar: 0.563926 
l0: 0.401825, l1: 0.396920, l2: 0.384802, l3: 0.379761, l4: 0.386186, l5: 0.4648

[epoch:  25/100, batch:   114/  792, ite: 47241] train loss: 5.324441, tar: 0.564695 
l0: 0.522368, l1: 0.524166, l2: 0.525129, l3: 0.527098, l4: 0.535537, l5: 0.640043, l6: 0.749099

[epoch:  25/100, batch:   116/  792, ite: 47242] train loss: 5.324017, tar: 0.564661 
l0: 0.372789, l1: 0.377495, l2: 0.379259, l3: 0.377340, l4: 0.415213, l5: 0.486476, l6: 0.656025

[epoch:  25/100, batch:   118/  792, ite: 47243] train loss: 5.322881, tar: 0.564507 
l0: 0.549923, l1: 0.550063, l2: 0.549425, l3: 0.540791, l4: 0.546739, l5: 0.680537, l6: 0.678917

[epoch:  25/100, batch:   120/  792, ite: 47244] train loss: 5.322478, tar: 0.564495 
l0: 0.516293, l1: 0.517965, l2: 0.519527, l3: 0.530797, l4: 0.566653, l5: 0.671656, l6: 0.972583

[epoch:  25/100, batch:   122/  792, ite: 47245] train loss: 5.322583, tar: 0.564456 
l0: 0.409857, l1: 0.415842, l2: 0.418609, l3: 0.430110, l4: 0.482808, l5: 0.565670, l6: 0.695330

[epoch:  25/100, batch:   124/  792, ite: 47246] train loss: 5.321646, tar: 0.56

[epoch:  25/100, batch:   204/  792, ite: 47286] train loss: 5.317259, tar: 0.563770 
l0: 0.486241, l1: 0.491208, l2: 0.488708, l3: 0.491857, l4: 0.530525, l5: 0.569776, l6: 0.779040

[epoch:  25/100, batch:   206/  792, ite: 47287] train loss: 5.316619, tar: 0.563710 
l0: 0.432050, l1: 0.428801, l2: 0.429251, l3: 0.427089, l4: 0.452279, l5: 0.537266, l6: 0.757227

[epoch:  25/100, batch:   208/  792, ite: 47288] train loss: 5.315753, tar: 0.563608 
l0: 0.452149, l1: 0.462619, l2: 0.463186, l3: 0.461545, l4: 0.504722, l5: 0.676048, l6: 0.747315

[epoch:  25/100, batch:   210/  792, ite: 47289] train loss: 5.315179, tar: 0.563522 
l0: 0.362783, l1: 0.364358, l2: 0.365117, l3: 0.370808, l4: 0.411228, l5: 0.548877, l6: 0.732779

[epoch:  25/100, batch:   212/  792, ite: 47290] train loss: 5.314215, tar: 0.563366 
l0: 0.215051, l1: 0.217167, l2: 0.221434, l3: 0.231685, l4: 0.255429, l5: 0.370927, l6: 0.487267

[epoch:  25/100, batch:   214/  792, ite: 47291] train loss: 5.312104, tar: 0.56

[epoch:  25/100, batch:   294/  792, ite: 47331] train loss: 5.322180, tar: 0.564596 
l0: 0.368523, l1: 0.368222, l2: 0.369976, l3: 0.378740, l4: 0.422097, l5: 0.528992, l6: 0.638067

[epoch:  25/100, batch:   296/  792, ite: 47332] train loss: 5.321036, tar: 0.564449 
l0: 0.448183, l1: 0.454888, l2: 0.460058, l3: 0.457303, l4: 0.489366, l5: 0.634778, l6: 0.837325

[epoch:  25/100, batch:   298/  792, ite: 47333] train loss: 5.320460, tar: 0.564361 
l0: 0.467373, l1: 0.469435, l2: 0.472330, l3: 0.475343, l4: 0.511144, l5: 0.594865, l6: 0.715963

[epoch:  25/100, batch:   300/  792, ite: 47334] train loss: 5.319795, tar: 0.564289 
l0: 0.380670, l1: 0.384200, l2: 0.383320, l3: 0.377423, l4: 0.395019, l5: 0.529115, l6: 0.750306

[epoch:  25/100, batch:   302/  792, ite: 47335] train loss: 5.318917, tar: 0.564151 
l0: 0.446431, l1: 0.452801, l2: 0.455169, l3: 0.458502, l4: 0.485772, l5: 0.679200, l6: 1.042015

[epoch:  25/100, batch:   304/  792, ite: 47336] train loss: 5.318896, tar: 0.56

[epoch:  25/100, batch:   384/  792, ite: 47376] train loss: 5.321077, tar: 0.564450 
l0: 0.974125, l1: 0.973789, l2: 0.976818, l3: 0.988862, l4: 1.027258, l5: 1.030768, l6: 1.080378

[epoch:  25/100, batch:   386/  792, ite: 47377] train loss: 5.323168, tar: 0.564747 
l0: 0.380510, l1: 0.380483, l2: 0.379296, l3: 0.388035, l4: 0.426992, l5: 0.589029, l6: 0.719670

[epoch:  25/100, batch:   388/  792, ite: 47378] train loss: 5.322210, tar: 0.564614 
l0: 0.377317, l1: 0.376943, l2: 0.377529, l3: 0.388684, l4: 0.379074, l5: 0.439405, l6: 0.538809

[epoch:  25/100, batch:   390/  792, ite: 47379] train loss: 5.320891, tar: 0.564478 
l0: 0.953554, l1: 0.931825, l2: 0.948444, l3: 0.958647, l4: 0.983863, l5: 0.861725, l6: 0.945891

[epoch:  25/100, batch:   392/  792, ite: 47380] train loss: 5.322458, tar: 0.564760 
l0: 0.666681, l1: 0.668326, l2: 0.666801, l3: 0.687156, l4: 0.740001, l5: 0.852950, l6: 1.643818

[epoch:  25/100, batch:   394/  792, ite: 47381] train loss: 5.324204, tar: 0.56

[epoch:  25/100, batch:   474/  792, ite: 47421] train loss: 5.315154, tar: 0.563777 
l0: 0.992418, l1: 1.001745, l2: 0.995198, l3: 1.000587, l4: 1.015634, l5: 1.124229, l6: 1.300840

[epoch:  25/100, batch:   476/  792, ite: 47422] train loss: 5.317736, tar: 0.564078 
l0: 0.560845, l1: 0.565602, l2: 0.566102, l3: 0.571160, l4: 0.580850, l5: 0.631914, l6: 0.748816

[epoch:  25/100, batch:   478/  792, ite: 47423] train loss: 5.317573, tar: 0.564076 
l0: 0.309705, l1: 0.310214, l2: 0.310815, l3: 0.316928, l4: 0.343995, l5: 0.427131, l6: 0.494028

[epoch:  25/100, batch:   480/  792, ite: 47424] train loss: 5.315927, tar: 0.563897 
l0: 0.148320, l1: 0.156448, l2: 0.159108, l3: 0.170793, l4: 0.202344, l5: 0.292772, l6: 0.418178

[epoch:  25/100, batch:   482/  792, ite: 47425] train loss: 5.313597, tar: 0.563605 
l0: 0.663237, l1: 0.672539, l2: 0.673780, l3: 0.676631, l4: 0.721173, l5: 0.815090, l6: 0.916515

[epoch:  25/100, batch:   484/  792, ite: 47426] train loss: 5.314231, tar: 0.56

[epoch:  25/100, batch:   564/  792, ite: 47466] train loss: 5.313003, tar: 0.563365 
l0: 0.333601, l1: 0.335971, l2: 0.333387, l3: 0.335805, l4: 0.362875, l5: 0.426610, l6: 0.479673

[epoch:  25/100, batch:   566/  792, ite: 47467] train loss: 5.311526, tar: 0.563209 
l0: 0.352610, l1: 0.357128, l2: 0.358832, l3: 0.358745, l4: 0.394307, l5: 0.458517, l6: 0.667031

[epoch:  25/100, batch:   568/  792, ite: 47468] train loss: 5.310434, tar: 0.563065 
l0: 0.313726, l1: 0.312773, l2: 0.311447, l3: 0.322834, l4: 0.348282, l5: 0.479830, l6: 0.659021

[epoch:  25/100, batch:   570/  792, ite: 47469] train loss: 5.309163, tar: 0.562896 
l0: 0.445744, l1: 0.447014, l2: 0.447083, l3: 0.462722, l4: 0.499299, l5: 0.663386, l6: 0.866388

[epoch:  25/100, batch:   572/  792, ite: 47470] train loss: 5.308768, tar: 0.562816 
l0: 0.428051, l1: 0.434769, l2: 0.425583, l3: 0.409944, l4: 0.409543, l5: 0.520803, l6: 0.660719

[epoch:  25/100, batch:   574/  792, ite: 47471] train loss: 5.307845, tar: 0.56

[epoch:  25/100, batch:   654/  792, ite: 47511] train loss: 5.298331, tar: 0.561532 
l0: 0.603518, l1: 0.606485, l2: 0.604571, l3: 0.598637, l4: 0.607747, l5: 0.623222, l6: 0.746120

[epoch:  25/100, batch:   656/  792, ite: 47512] train loss: 5.298287, tar: 0.561560 
l0: 0.445674, l1: 0.448807, l2: 0.445548, l3: 0.454163, l4: 0.458415, l5: 0.572138, l6: 0.684523

[epoch:  25/100, batch:   658/  792, ite: 47513] train loss: 5.297650, tar: 0.561484 
l0: 0.835648, l1: 0.855876, l2: 0.853764, l3: 0.841828, l4: 0.892450, l5: 0.973545, l6: 1.129253

[epoch:  25/100, batch:   660/  792, ite: 47514] train loss: 5.299268, tar: 0.561665 
l0: 0.478187, l1: 0.477933, l2: 0.477537, l3: 0.474913, l4: 0.485133, l5: 0.554453, l6: 0.639467

[epoch:  25/100, batch:   662/  792, ite: 47515] train loss: 5.298567, tar: 0.561610 
l0: 0.400345, l1: 0.400903, l2: 0.397402, l3: 0.396722, l4: 0.435540, l5: 0.545730, l6: 0.717543

[epoch:  25/100, batch:   664/  792, ite: 47516] train loss: 5.297761, tar: 0.56

[epoch:  25/100, batch:   744/  792, ite: 47556] train loss: 5.305893, tar: 0.562481 
l0: 0.462871, l1: 0.461084, l2: 0.463099, l3: 0.470709, l4: 0.470901, l5: 0.559679, l6: 0.736508

[epoch:  25/100, batch:   746/  792, ite: 47557] train loss: 5.305384, tar: 0.562417 
l0: 0.458518, l1: 0.461008, l2: 0.462906, l3: 0.480766, l4: 0.538405, l5: 0.637497, l6: 0.759598

[epoch:  25/100, batch:   748/  792, ite: 47558] train loss: 5.304951, tar: 0.562351 
l0: 0.462070, l1: 0.466841, l2: 0.465247, l3: 0.473832, l4: 0.522343, l5: 0.598264, l6: 0.767370

[epoch:  25/100, batch:   750/  792, ite: 47559] train loss: 5.304565, tar: 0.562286 
l0: 1.002014, l1: 0.994455, l2: 0.978630, l3: 0.975041, l4: 1.014554, l5: 1.135070, l6: 1.065710

[epoch:  25/100, batch:   752/  792, ite: 47560] train loss: 5.306531, tar: 0.562568 
l0: 0.700878, l1: 0.705526, l2: 0.704926, l3: 0.710598, l4: 0.710958, l5: 0.757903, l6: 0.861776

[epoch:  25/100, batch:   754/  792, ite: 47561] train loss: 5.307026, tar: 0.56

l0: 0.421233, l1: 0.422125, l2: 0.421497, l3: 0.433109, l4: 0.473846, l5: 0.558306, l6: 0.768224

[epoch:  26/100, batch:    42/  792, ite: 47601] train loss: 5.317477, tar: 0.563737 
l0: 0.671008, l1: 0.678566, l2: 0.673482, l3: 0.672590, l4: 0.676877, l5: 0.751550, l6: 0.923082

[epoch:  26/100, batch:    44/  792, ite: 47602] train loss: 5.317873, tar: 0.563803 
l0: 0.546641, l1: 0.549864, l2: 0.552812, l3: 0.556666, l4: 0.590054, l5: 0.710338, l6: 0.797336

[epoch:  26/100, batch:    46/  792, ite: 47603] train loss: 5.317815, tar: 0.563793 
l0: 0.292394, l1: 0.301774, l2: 0.297469, l3: 0.304266, l4: 0.318849, l5: 0.405963, l6: 0.445460

[epoch:  26/100, batch:    48/  792, ite: 47604] train loss: 5.316305, tar: 0.563624 
l0: 0.350754, l1: 0.353173, l2: 0.358213, l3: 0.395919, l4: 0.402230, l5: 0.532155, l6: 0.647129

[epoch:  26/100, batch:    50/  792, ite: 47605] train loss: 5.315358, tar: 0.563491 
l0: 0.546538, l1: 0.555640, l2: 0.561430, l3: 0.583872, l4: 0.646604, l5: 0.7860

[epoch:  26/100, batch:   130/  792, ite: 47645] train loss: 5.307930, tar: 0.562256 
l0: 0.382826, l1: 0.386322, l2: 0.386230, l3: 0.393955, l4: 0.425077, l5: 0.544165, l6: 0.679968

[epoch:  26/100, batch:   132/  792, ite: 47646] train loss: 5.307098, tar: 0.562147 
l0: 0.714663, l1: 0.714559, l2: 0.712364, l3: 0.716838, l4: 0.735511, l5: 0.839316, l6: 1.183321

[epoch:  26/100, batch:   134/  792, ite: 47647] train loss: 5.308184, tar: 0.562239 
l0: 0.839317, l1: 0.852894, l2: 0.852280, l3: 0.849227, l4: 0.884874, l5: 0.971706, l6: 1.049088

[epoch:  26/100, batch:   136/  792, ite: 47648] train loss: 5.309461, tar: 0.562408 
l0: 1.077474, l1: 1.081018, l2: 1.085391, l3: 1.091004, l4: 1.112833, l5: 1.170910, l6: 1.222089

[epoch:  26/100, batch:   138/  792, ite: 47649] train loss: 5.311755, tar: 0.562720 
l0: 0.530826, l1: 0.535159, l2: 0.540367, l3: 0.553953, l4: 0.562804, l5: 0.731531, l6: 0.972515

[epoch:  26/100, batch:   140/  792, ite: 47650] train loss: 5.311830, tar: 0.56

[epoch:  26/100, batch:   220/  792, ite: 47690] train loss: 5.308236, tar: 0.562268 
l0: 0.362912, l1: 0.369038, l2: 0.368853, l3: 0.380526, l4: 0.421737, l5: 0.544346, l6: 0.676922

[epoch:  26/100, batch:   222/  792, ite: 47691] train loss: 5.307349, tar: 0.562151 
l0: 0.649065, l1: 0.653757, l2: 0.647000, l3: 0.671623, l4: 0.725623, l5: 0.857593, l6: 1.150151

[epoch:  26/100, batch:   224/  792, ite: 47692] train loss: 5.308058, tar: 0.562202 
l0: 0.726028, l1: 0.732264, l2: 0.730439, l3: 0.726555, l4: 0.744208, l5: 0.824694, l6: 1.154372

[epoch:  26/100, batch:   226/  792, ite: 47693] train loss: 5.308988, tar: 0.562299 
l0: 0.273608, l1: 0.275570, l2: 0.284073, l3: 0.294768, l4: 0.320394, l5: 0.427604, l6: 0.481566

[epoch:  26/100, batch:   228/  792, ite: 47694] train loss: 5.307570, tar: 0.562128 
l0: 0.419126, l1: 0.418578, l2: 0.420211, l3: 0.426362, l4: 0.439417, l5: 0.543353, l6: 0.732225

[epoch:  26/100, batch:   230/  792, ite: 47695] train loss: 5.306972, tar: 0.56

[epoch:  26/100, batch:   310/  792, ite: 47735] train loss: 5.291152, tar: 0.560350 
l0: 0.678610, l1: 0.679615, l2: 0.679597, l3: 0.681848, l4: 0.711279, l5: 0.817799, l6: 0.872827

[epoch:  26/100, batch:   312/  792, ite: 47736] train loss: 5.291627, tar: 0.560418 
l0: 0.567818, l1: 0.570266, l2: 0.570392, l3: 0.579581, l4: 0.618102, l5: 0.742351, l6: 0.880501

[epoch:  26/100, batch:   314/  792, ite: 47737] train loss: 5.291728, tar: 0.560423 
l0: 0.389683, l1: 0.391876, l2: 0.387971, l3: 0.393777, l4: 0.438293, l5: 0.546098, l6: 1.080422

[epoch:  26/100, batch:   316/  792, ite: 47738] train loss: 5.291409, tar: 0.560324 
l0: 0.755697, l1: 0.767997, l2: 0.765646, l3: 0.773499, l4: 0.784286, l5: 0.873062, l6: 0.968316

[epoch:  26/100, batch:   318/  792, ite: 47739] train loss: 5.292245, tar: 0.560437 
l0: 0.545729, l1: 0.544682, l2: 0.542748, l3: 0.563051, l4: 0.624194, l5: 0.693687, l6: 0.787413

[epoch:  26/100, batch:   320/  792, ite: 47740] train loss: 5.292167, tar: 0.56

[epoch:  26/100, batch:   400/  792, ite: 47780] train loss: 5.295982, tar: 0.560699 
l0: 0.619908, l1: 0.615256, l2: 0.611648, l3: 0.606021, l4: 0.649613, l5: 0.855485, l6: 1.089888

[epoch:  26/100, batch:   402/  792, ite: 47781] train loss: 5.296503, tar: 0.560732 
l0: 0.714112, l1: 0.717553, l2: 0.702385, l3: 0.702544, l4: 0.756500, l5: 0.809414, l6: 0.997640

[epoch:  26/100, batch:   404/  792, ite: 47782] train loss: 5.297104, tar: 0.560818 
l0: 0.357708, l1: 0.358509, l2: 0.360264, l3: 0.373575, l4: 0.393051, l5: 0.442666, l6: 0.536440

[epoch:  26/100, batch:   406/  792, ite: 47783] train loss: 5.296019, tar: 0.560704 
l0: 0.854693, l1: 0.851677, l2: 0.855450, l3: 0.860705, l4: 0.910413, l5: 0.980325, l6: 1.192764

[epoch:  26/100, batch:   408/  792, ite: 47784] train loss: 5.297391, tar: 0.560869 
l0: 0.355551, l1: 0.358285, l2: 0.360679, l3: 0.364844, l4: 0.405164, l5: 0.496206, l6: 0.753653

[epoch:  26/100, batch:   410/  792, ite: 47785] train loss: 5.296598, tar: 0.56

[epoch:  26/100, batch:   490/  792, ite: 47825] train loss: 5.300499, tar: 0.561243 
l0: 0.599331, l1: 0.596336, l2: 0.598718, l3: 0.602152, l4: 0.601014, l5: 0.578159, l6: 0.737731

[epoch:  26/100, batch:   492/  792, ite: 47826] train loss: 5.300400, tar: 0.561264 
l0: 0.338214, l1: 0.332490, l2: 0.338044, l3: 0.337282, l4: 0.366591, l5: 0.410792, l6: 0.549684

[epoch:  26/100, batch:   494/  792, ite: 47827] train loss: 5.299274, tar: 0.561142 
l0: 0.402264, l1: 0.406031, l2: 0.407722, l3: 0.425043, l4: 0.467838, l5: 0.572625, l6: 0.748033

[epoch:  26/100, batch:   496/  792, ite: 47828] train loss: 5.298719, tar: 0.561055 
l0: 0.551102, l1: 0.553220, l2: 0.542005, l3: 0.531171, l4: 0.533914, l5: 0.561436, l6: 0.628040

[epoch:  26/100, batch:   498/  792, ite: 47829] train loss: 5.298320, tar: 0.561050 
l0: 0.370402, l1: 0.379658, l2: 0.383044, l3: 0.404235, l4: 0.456985, l5: 0.569777, l6: 0.809221

[epoch:  26/100, batch:   500/  792, ite: 47830] train loss: 5.297726, tar: 0.56

[epoch:  26/100, batch:   580/  792, ite: 47870] train loss: 5.290535, tar: 0.560065 
l0: 0.382079, l1: 0.387737, l2: 0.389464, l3: 0.393726, l4: 0.406883, l5: 0.511627, l6: 0.720024

[epoch:  26/100, batch:   582/  792, ite: 47871] train loss: 5.289842, tar: 0.559970 
l0: 0.467780, l1: 0.472124, l2: 0.474359, l3: 0.488427, l4: 0.498192, l5: 0.534732, l6: 0.715608

[epoch:  26/100, batch:   584/  792, ite: 47872] train loss: 5.289390, tar: 0.559921 
l0: 0.418052, l1: 0.422046, l2: 0.421954, l3: 0.425578, l4: 0.456509, l5: 0.480431, l6: 0.683443

[epoch:  26/100, batch:   586/  792, ite: 47873] train loss: 5.288724, tar: 0.559845 
l0: 0.409386, l1: 0.412982, l2: 0.415600, l3: 0.424083, l4: 0.476582, l5: 0.633535, l6: 0.659260

[epoch:  26/100, batch:   588/  792, ite: 47874] train loss: 5.288077, tar: 0.559765 
l0: 0.837012, l1: 0.843022, l2: 0.838531, l3: 0.839381, l4: 0.854363, l5: 0.901687, l6: 1.098969

[epoch:  26/100, batch:   590/  792, ite: 47875] train loss: 5.289172, tar: 0.55

[epoch:  26/100, batch:   670/  792, ite: 47915] train loss: 5.286060, tar: 0.559541 
l0: 0.890496, l1: 0.894722, l2: 0.879295, l3: 0.883142, l4: 0.920478, l5: 0.958946, l6: 0.947892

[epoch:  26/100, batch:   672/  792, ite: 47916] train loss: 5.287089, tar: 0.559714 
l0: 0.658684, l1: 0.653436, l2: 0.654699, l3: 0.683149, l4: 0.725859, l5: 0.782261, l6: 0.956961

[epoch:  26/100, batch:   674/  792, ite: 47917] train loss: 5.287568, tar: 0.559765 
l0: 0.418655, l1: 0.416430, l2: 0.415748, l3: 0.413579, l4: 0.455568, l5: 0.578475, l6: 0.622065

[epoch:  26/100, batch:   676/  792, ite: 47918] train loss: 5.286892, tar: 0.559692 
l0: 0.926766, l1: 0.953354, l2: 0.945398, l3: 0.920652, l4: 0.964690, l5: 1.009284, l6: 1.016706

[epoch:  26/100, batch:   678/  792, ite: 47919] train loss: 5.288236, tar: 0.559883 
l0: 0.751671, l1: 0.758924, l2: 0.766599, l3: 0.770091, l4: 0.813740, l5: 1.003995, l6: 1.077861

[epoch:  26/100, batch:   680/  792, ite: 47920] train loss: 5.289255, tar: 0.55

[epoch:  26/100, batch:   760/  792, ite: 47960] train loss: 5.292146, tar: 0.560393 
l0: 1.369238, l1: 1.384964, l2: 1.380556, l3: 1.369184, l4: 1.456993, l5: 1.531034, l6: 1.590931

[epoch:  26/100, batch:   762/  792, ite: 47961] train loss: 5.295346, tar: 0.560806 
l0: 0.461602, l1: 0.468038, l2: 0.468300, l3: 0.473605, l4: 0.506475, l5: 0.647387, l6: 0.801342

[epoch:  26/100, batch:   764/  792, ite: 47962] train loss: 5.295039, tar: 0.560755 
l0: 0.398668, l1: 0.400908, l2: 0.399656, l3: 0.400059, l4: 0.413177, l5: 0.522818, l6: 0.565637

[epoch:  26/100, batch:   766/  792, ite: 47963] train loss: 5.294260, tar: 0.560673 
l0: 0.574617, l1: 0.575808, l2: 0.574458, l3: 0.565059, l4: 0.560264, l5: 0.631798, l6: 0.831366

[epoch:  26/100, batch:   768/  792, ite: 47964] train loss: 5.294242, tar: 0.560680 
l0: 0.518255, l1: 0.525739, l2: 0.522574, l3: 0.529693, l4: 0.560631, l5: 0.616459, l6: 0.942077

[epoch:  26/100, batch:   770/  792, ite: 47965] train loss: 5.294131, tar: 0.56

l0: 0.314085, l1: 0.315307, l2: 0.315755, l3: 0.317977, l4: 0.349022, l5: 0.531349, l6: 0.591521

[epoch:  27/100, batch:    58/  792, ite: 48005] train loss: 4.964139, tar: 0.505986 
l0: 0.334081, l1: 0.336053, l2: 0.331933, l3: 0.329696, l4: 0.361825, l5: 0.461680, l6: 0.565066

[epoch:  27/100, batch:    60/  792, ite: 48006] train loss: 4.696461, tar: 0.477335 
l0: 0.274563, l1: 0.281152, l2: 0.283680, l3: 0.291175, l4: 0.331483, l5: 0.499007, l6: 0.608960

[epoch:  27/100, batch:    62/  792, ite: 48007] train loss: 4.482725, tar: 0.448368 
l0: 0.368053, l1: 0.373872, l2: 0.371517, l3: 0.371084, l4: 0.392099, l5: 0.451664, l6: 0.594351

[epoch:  27/100, batch:    64/  792, ite: 48008] train loss: 4.366811, tar: 0.438328 
l0: 0.395373, l1: 0.396373, l2: 0.394396, l3: 0.398520, l4: 0.416693, l5: 0.566294, l6: 0.720970

[epoch:  27/100, batch:    66/  792, ite: 48009] train loss: 4.335615, tar: 0.433555 
l0: 0.441826, l1: 0.448904, l2: 0.449554, l3: 0.453134, l4: 0.505106, l5: 0.6078

[epoch:  27/100, batch:   146/  792, ite: 48049] train loss: 4.930963, tar: 0.518559 
l0: 0.342848, l1: 0.346346, l2: 0.347330, l3: 0.347991, l4: 0.390725, l5: 0.489481, l6: 0.604347

[epoch:  27/100, batch:   148/  792, ite: 48050] train loss: 4.903223, tar: 0.515045 
l0: 0.537354, l1: 0.541586, l2: 0.539304, l3: 0.546520, l4: 0.574380, l5: 0.687387, l6: 0.843646

[epoch:  27/100, batch:   150/  792, ite: 48051] train loss: 4.908003, tar: 0.515482 
l0: 0.510368, l1: 0.510472, l2: 0.519470, l3: 0.537734, l4: 0.556025, l5: 0.631430, l6: 0.753888

[epoch:  27/100, batch:   152/  792, ite: 48052] train loss: 4.906119, tar: 0.515384 
l0: 0.519433, l1: 0.524171, l2: 0.526058, l3: 0.543891, l4: 0.564344, l5: 0.674104, l6: 0.762709

[epoch:  27/100, batch:   154/  792, ite: 48053] train loss: 4.908541, tar: 0.515460 
l0: 0.406955, l1: 0.408117, l2: 0.410976, l3: 0.418795, l4: 0.462611, l5: 0.676455, l6: 0.807516

[epoch:  27/100, batch:   156/  792, ite: 48054] train loss: 4.898654, tar: 0.51

[epoch:  27/100, batch:   236/  792, ite: 48094] train loss: 4.851981, tar: 0.504639 
l0: 0.470030, l1: 0.473356, l2: 0.476673, l3: 0.484813, l4: 0.527186, l5: 0.606982, l6: 0.848717

[epoch:  27/100, batch:   238/  792, ite: 48095] train loss: 4.850593, tar: 0.504274 
l0: 0.755558, l1: 0.758487, l2: 0.765797, l3: 0.772588, l4: 0.777458, l5: 0.823780, l6: 0.955651

[epoch:  27/100, batch:   240/  792, ite: 48096] train loss: 4.869484, tar: 0.506892 
l0: 0.516549, l1: 0.519333, l2: 0.519531, l3: 0.524921, l4: 0.535439, l5: 0.613939, l6: 0.951129

[epoch:  27/100, batch:   242/  792, ite: 48097] train loss: 4.875120, tar: 0.506991 
l0: 0.333978, l1: 0.335054, l2: 0.336640, l3: 0.351110, l4: 0.384181, l5: 0.517580, l6: 0.790036

[epoch:  27/100, batch:   244/  792, ite: 48098] train loss: 4.865640, tar: 0.505226 
l0: 0.205179, l1: 0.214682, l2: 0.213813, l3: 0.220271, l4: 0.245990, l5: 0.313000, l6: 0.439137

[epoch:  27/100, batch:   246/  792, ite: 48099] train loss: 4.840573, tar: 0.50

[epoch:  27/100, batch:   326/  792, ite: 48139] train loss: 5.124574, tar: 0.539766 
l0: 0.973628, l1: 0.977205, l2: 0.978544, l3: 0.981560, l4: 0.990387, l5: 0.972641, l6: 1.267769

[epoch:  27/100, batch:   328/  792, ite: 48140] train loss: 5.148709, tar: 0.542865 
l0: 0.425636, l1: 0.425453, l2: 0.429054, l3: 0.424855, l4: 0.438239, l5: 0.529024, l6: 0.598308

[epoch:  27/100, batch:   330/  792, ite: 48141] train loss: 5.139969, tar: 0.542033 
l0: 0.667610, l1: 0.671648, l2: 0.670832, l3: 0.680038, l4: 0.697275, l5: 0.647935, l6: 0.658070

[epoch:  27/100, batch:   332/  792, ite: 48142] train loss: 5.140850, tar: 0.542917 
l0: 0.908541, l1: 0.916482, l2: 0.911516, l3: 0.904068, l4: 0.942987, l5: 1.101199, l6: 1.362643

[epoch:  27/100, batch:   334/  792, ite: 48143] train loss: 5.164050, tar: 0.545474 
l0: 0.739889, l1: 0.728921, l2: 0.736124, l3: 0.738195, l4: 0.729442, l5: 0.779082, l6: 0.907183

[epoch:  27/100, batch:   336/  792, ite: 48144] train loss: 5.172909, tar: 0.54

[epoch:  27/100, batch:   416/  792, ite: 48184] train loss: 5.154852, tar: 0.541952 
l0: 0.497483, l1: 0.498041, l2: 0.498809, l3: 0.506642, l4: 0.530936, l5: 0.602961, l6: 0.784633

[epoch:  27/100, batch:   418/  792, ite: 48185] train loss: 5.152825, tar: 0.541712 
l0: 0.951489, l1: 0.963448, l2: 0.961159, l3: 0.950592, l4: 0.974106, l5: 1.015357, l6: 1.033767

[epoch:  27/100, batch:   420/  792, ite: 48186] train loss: 5.167985, tar: 0.543915 
l0: 0.357258, l1: 0.358968, l2: 0.359298, l3: 0.364897, l4: 0.389112, l5: 0.472590, l6: 0.663046

[epoch:  27/100, batch:   422/  792, ite: 48187] train loss: 5.160313, tar: 0.542917 
l0: 0.338619, l1: 0.338217, l2: 0.342709, l3: 0.375393, l4: 0.418164, l5: 0.509055, l6: 0.626258

[epoch:  27/100, batch:   424/  792, ite: 48188] train loss: 5.152615, tar: 0.541830 
l0: 0.350368, l1: 0.359617, l2: 0.363158, l3: 0.371139, l4: 0.465445, l5: 0.595710, l6: 0.749906

[epoch:  27/100, batch:   426/  792, ite: 48189] train loss: 5.146494, tar: 0.54

[epoch:  27/100, batch:   506/  792, ite: 48229] train loss: 5.108961, tar: 0.536616 
l0: 0.820287, l1: 0.817191, l2: 0.815987, l3: 0.828478, l4: 0.862093, l5: 0.841801, l6: 1.066100

[epoch:  27/100, batch:   508/  792, ite: 48230] train loss: 5.117915, tar: 0.537849 
l0: 0.569456, l1: 0.570933, l2: 0.570342, l3: 0.571006, l4: 0.569516, l5: 0.707486, l6: 0.993627

[epoch:  27/100, batch:   510/  792, ite: 48231] train loss: 5.120134, tar: 0.537986 
l0: 0.763077, l1: 0.771781, l2: 0.769543, l3: 0.779708, l4: 0.844580, l5: 1.013477, l6: 1.176481

[epoch:  27/100, batch:   512/  792, ite: 48232] train loss: 5.130869, tar: 0.538956 
l0: 0.539235, l1: 0.545073, l2: 0.550938, l3: 0.557094, l4: 0.579763, l5: 0.701638, l6: 1.199954

[epoch:  27/100, batch:   514/  792, ite: 48233] train loss: 5.135046, tar: 0.538958 
l0: 0.596932, l1: 0.605209, l2: 0.605110, l3: 0.612570, l4: 0.649388, l5: 0.790512, l6: 0.910870

[epoch:  27/100, batch:   516/  792, ite: 48234] train loss: 5.137933, tar: 0.53

[epoch:  27/100, batch:   596/  792, ite: 48274] train loss: 5.121624, tar: 0.537045 
l0: 0.365140, l1: 0.359878, l2: 0.360465, l3: 0.363952, l4: 0.374677, l5: 0.513580, l6: 0.659765

[epoch:  27/100, batch:   598/  792, ite: 48275] train loss: 5.116280, tar: 0.536420 
l0: 0.430680, l1: 0.430421, l2: 0.430574, l3: 0.431417, l4: 0.439259, l5: 0.495226, l6: 0.657519

[epoch:  27/100, batch:   600/  792, ite: 48276] train loss: 5.112416, tar: 0.536037 
l0: 0.448994, l1: 0.457398, l2: 0.464947, l3: 0.476984, l4: 0.515042, l5: 0.614863, l6: 0.724912

[epoch:  27/100, batch:   602/  792, ite: 48277] train loss: 5.110200, tar: 0.535723 
l0: 0.413367, l1: 0.413612, l2: 0.412864, l3: 0.415893, l4: 0.448620, l5: 0.553247, l6: 0.761964

[epoch:  27/100, batch:   604/  792, ite: 48278] train loss: 5.107161, tar: 0.535283 
l0: 0.386932, l1: 0.391112, l2: 0.387241, l3: 0.397470, l4: 0.412137, l5: 0.500794, l6: 0.536348

[epoch:  27/100, batch:   606/  792, ite: 48279] train loss: 5.102076, tar: 0.53

[epoch:  27/100, batch:   686/  792, ite: 48319] train loss: 5.107730, tar: 0.536154 
l0: 0.707379, l1: 0.701470, l2: 0.701175, l3: 0.701506, l4: 0.723716, l5: 0.839129, l6: 1.011788

[epoch:  27/100, batch:   688/  792, ite: 48320] train loss: 5.112041, tar: 0.536689 
l0: 1.249068, l1: 1.252990, l2: 1.253273, l3: 1.249502, l4: 1.224615, l5: 1.341733, l6: 1.367488

[epoch:  27/100, batch:   690/  792, ite: 48321] train loss: 5.127942, tar: 0.538908 
l0: 0.673471, l1: 0.675058, l2: 0.675742, l3: 0.676735, l4: 0.684191, l5: 0.672746, l6: 0.864334

[epoch:  27/100, batch:   692/  792, ite: 48322] train loss: 5.129945, tar: 0.539326 
l0: 0.789648, l1: 0.783470, l2: 0.778096, l3: 0.771500, l4: 0.774862, l5: 0.841296, l6: 0.976489

[epoch:  27/100, batch:   694/  792, ite: 48323] train loss: 5.134699, tar: 0.540101 
l0: 0.452085, l1: 0.456490, l2: 0.456176, l3: 0.460594, l4: 0.501494, l5: 0.614472, l6: 0.775645

[epoch:  27/100, batch:   696/  792, ite: 48324] train loss: 5.132546, tar: 0.53

[epoch:  27/100, batch:   776/  792, ite: 48364] train loss: 5.187271, tar: 0.546410 
l0: 0.783948, l1: 0.794578, l2: 0.787513, l3: 0.779696, l4: 0.791498, l5: 0.916245, l6: 1.337308

[epoch:  27/100, batch:   778/  792, ite: 48365] train loss: 5.194079, tar: 0.547061 
l0: 0.841918, l1: 0.846082, l2: 0.850252, l3: 0.840924, l4: 0.893186, l5: 1.015603, l6: 1.148829

[epoch:  27/100, batch:   780/  792, ite: 48366] train loss: 5.201096, tar: 0.547866 
l0: 0.521533, l1: 0.531788, l2: 0.529283, l3: 0.528158, l4: 0.542507, l5: 0.629630, l6: 0.815252

[epoch:  27/100, batch:   782/  792, ite: 48367] train loss: 5.200673, tar: 0.547794 
l0: 0.590279, l1: 0.592698, l2: 0.597965, l3: 0.600964, l4: 0.598297, l5: 0.678018, l6: 0.919923

[epoch:  27/100, batch:   784/  792, ite: 48368] train loss: 5.201555, tar: 0.547910 
l0: 0.551834, l1: 0.552454, l2: 0.554421, l3: 0.547321, l4: 0.569684, l5: 0.669826, l6: 0.829522

[epoch:  27/100, batch:   786/  792, ite: 48369] train loss: 5.201536, tar: 0.54

l0: 0.630733, l1: 0.630715, l2: 0.630993, l3: 0.631006, l4: 0.646269, l5: 0.680531, l6: 0.772966

[epoch:  28/100, batch:    74/  792, ite: 48409] train loss: 5.242975, tar: 0.552528 
l0: 0.456185, l1: 0.458008, l2: 0.458763, l3: 0.469113, l4: 0.516922, l5: 0.592834, l6: 0.717499

[epoch:  28/100, batch:    76/  792, ite: 48410] train loss: 5.240988, tar: 0.552293 
l0: 1.114756, l1: 1.124564, l2: 1.122801, l3: 1.118535, l4: 1.147020, l5: 1.177334, l6: 1.216437

[epoch:  28/100, batch:    78/  792, ite: 48411] train loss: 5.251039, tar: 0.553662 
l0: 0.410819, l1: 0.413550, l2: 0.414035, l3: 0.422709, l4: 0.432233, l5: 0.537895, l6: 0.646268

[epoch:  28/100, batch:    80/  792, ite: 48412] train loss: 5.248033, tar: 0.553315 
l0: 0.530621, l1: 0.533418, l2: 0.537258, l3: 0.538417, l4: 0.591025, l5: 0.656402, l6: 0.854551

[epoch:  28/100, batch:    82/  792, ite: 48413] train loss: 5.247892, tar: 0.553260 
l0: 0.661400, l1: 0.661756, l2: 0.666060, l3: 0.660923, l4: 0.714661, l5: 0.8170

[epoch:  28/100, batch:   162/  792, ite: 48453] train loss: 5.231804, tar: 0.551859 
l0: 0.507704, l1: 0.519131, l2: 0.521608, l3: 0.535031, l4: 0.565786, l5: 0.660915, l6: 0.722753

[epoch:  28/100, batch:   164/  792, ite: 48454] train loss: 5.230808, tar: 0.551761 
l0: 0.337903, l1: 0.341355, l2: 0.343961, l3: 0.347984, l4: 0.406801, l5: 0.586854, l6: 0.692168

[epoch:  28/100, batch:   166/  792, ite: 48455] train loss: 5.227576, tar: 0.551291 
l0: 0.649798, l1: 0.650421, l2: 0.649656, l3: 0.654080, l4: 0.681168, l5: 0.751791, l6: 1.055124

[epoch:  28/100, batch:   168/  792, ite: 48456] train loss: 5.229674, tar: 0.551507 
l0: 1.146010, l1: 1.154996, l2: 1.148182, l3: 1.141537, l4: 1.161415, l5: 1.186248, l6: 1.421718

[epoch:  28/100, batch:   170/  792, ite: 48457] train loss: 5.239740, tar: 0.552808 
l0: 0.529881, l1: 0.531698, l2: 0.529489, l3: 0.532935, l4: 0.550641, l5: 0.591687, l6: 0.729633

[epoch:  28/100, batch:   172/  792, ite: 48458] train loss: 5.238589, tar: 0.55

[epoch:  28/100, batch:   252/  792, ite: 48498] train loss: 5.229323, tar: 0.551376 
l0: 0.524978, l1: 0.533064, l2: 0.527349, l3: 0.524792, l4: 0.522597, l5: 0.580132, l6: 0.741092

[epoch:  28/100, batch:   254/  792, ite: 48499] train loss: 5.228608, tar: 0.551323 
l0: 0.668815, l1: 0.670642, l2: 0.662996, l3: 0.679359, l4: 0.750805, l5: 0.752010, l6: 1.077463

[epoch:  28/100, batch:   256/  792, ite: 48500] train loss: 5.231034, tar: 0.551558 
l0: 0.217725, l1: 0.225838, l2: 0.226499, l3: 0.232989, l4: 0.259141, l5: 0.328328, l6: 0.397841

[epoch:  28/100, batch:   258/  792, ite: 48501] train loss: 5.225191, tar: 0.550892 
l0: 0.544601, l1: 0.546484, l2: 0.547594, l3: 0.555205, l4: 0.564947, l5: 0.652839, l6: 0.812408

[epoch:  28/100, batch:   260/  792, ite: 48502] train loss: 5.225093, tar: 0.550879 
l0: 0.457728, l1: 0.464837, l2: 0.463181, l3: 0.460061, l4: 0.485327, l5: 0.684319, l6: 0.849373

[epoch:  28/100, batch:   262/  792, ite: 48503] train loss: 5.224180, tar: 0.55

[epoch:  28/100, batch:   342/  792, ite: 48543] train loss: 5.197008, tar: 0.547580 
l0: 0.842319, l1: 0.856065, l2: 0.847836, l3: 0.842876, l4: 0.857418, l5: 0.894625, l6: 0.969895

[epoch:  28/100, batch:   344/  792, ite: 48544] train loss: 5.200591, tar: 0.548122 
l0: 0.267252, l1: 0.265492, l2: 0.263874, l3: 0.267281, l4: 0.323750, l5: 0.364709, l6: 0.567253

[epoch:  28/100, batch:   346/  792, ite: 48545] train loss: 5.196402, tar: 0.547606 
l0: 0.492658, l1: 0.491430, l2: 0.489477, l3: 0.500795, l4: 0.537100, l5: 0.680513, l6: 0.874410

[epoch:  28/100, batch:   348/  792, ite: 48546] train loss: 5.196060, tar: 0.547506 
l0: 0.445230, l1: 0.452274, l2: 0.452160, l3: 0.450611, l4: 0.473713, l5: 0.535961, l6: 0.797083

[epoch:  28/100, batch:   350/  792, ite: 48547] train loss: 5.194737, tar: 0.547319 
l0: 0.431210, l1: 0.435084, l2: 0.439888, l3: 0.448615, l4: 0.476623, l5: 0.598176, l6: 0.809133

[epoch:  28/100, batch:   352/  792, ite: 48548] train loss: 5.193660, tar: 0.54

[epoch:  28/100, batch:   432/  792, ite: 48588] train loss: 5.216743, tar: 0.549562 
l0: 0.482590, l1: 0.484399, l2: 0.480600, l3: 0.478315, l4: 0.508980, l5: 0.619686, l6: 0.708138

[epoch:  28/100, batch:   434/  792, ite: 48589] train loss: 5.215652, tar: 0.549449 
l0: 0.433658, l1: 0.436139, l2: 0.436261, l3: 0.442280, l4: 0.485846, l5: 0.563199, l6: 0.741309

[epoch:  28/100, batch:   436/  792, ite: 48590] train loss: 5.214160, tar: 0.549252 
l0: 0.352814, l1: 0.354505, l2: 0.357221, l3: 0.359975, l4: 0.354337, l5: 0.495511, l6: 0.576589

[epoch:  28/100, batch:   438/  792, ite: 48591] train loss: 5.211263, tar: 0.548920 
l0: 0.461715, l1: 0.464005, l2: 0.461419, l3: 0.459218, l4: 0.492658, l5: 0.569318, l6: 0.681148

[epoch:  28/100, batch:   440/  792, ite: 48592] train loss: 5.209772, tar: 0.548773 
l0: 0.531034, l1: 0.535793, l2: 0.529407, l3: 0.521075, l4: 0.533637, l5: 0.575471, l6: 0.715709

[epoch:  28/100, batch:   442/  792, ite: 48593] train loss: 5.208863, tar: 0.54

[epoch:  28/100, batch:   522/  792, ite: 48633] train loss: 5.206379, tar: 0.548307 
l0: 0.767430, l1: 0.843295, l2: 0.804718, l3: 0.812946, l4: 0.863079, l5: 1.016271, l6: 1.438246

[epoch:  28/100, batch:   524/  792, ite: 48634] train loss: 5.211243, tar: 0.548653 
l0: 0.338353, l1: 0.339570, l2: 0.337387, l3: 0.350201, l4: 0.373006, l5: 0.449555, l6: 0.590929

[epoch:  28/100, batch:   526/  792, ite: 48635] train loss: 5.208406, tar: 0.548322 
l0: 0.473970, l1: 0.475516, l2: 0.474269, l3: 0.480559, l4: 0.497456, l5: 0.657163, l6: 0.842844

[epoch:  28/100, batch:   528/  792, ite: 48636] train loss: 5.207818, tar: 0.548205 
l0: 0.502967, l1: 0.508941, l2: 0.508906, l3: 0.498241, l4: 0.502175, l5: 0.619140, l6: 0.773415

[epoch:  28/100, batch:   530/  792, ite: 48637] train loss: 5.207213, tar: 0.548134 
l0: 0.370897, l1: 0.380935, l2: 0.382036, l3: 0.398526, l4: 0.421029, l5: 0.575571, l6: 0.821670

[epoch:  28/100, batch:   532/  792, ite: 48638] train loss: 5.205676, tar: 0.54

[epoch:  28/100, batch:   612/  792, ite: 48678] train loss: 5.190672, tar: 0.546135 
l0: 0.921448, l1: 0.930497, l2: 0.925668, l3: 0.918282, l4: 0.957992, l5: 1.035999, l6: 1.069279

[epoch:  28/100, batch:   614/  792, ite: 48679] train loss: 5.194691, tar: 0.546688 
l0: 0.648620, l1: 0.645909, l2: 0.647868, l3: 0.648660, l4: 0.641454, l5: 0.676148, l6: 0.773860

[epoch:  28/100, batch:   616/  792, ite: 48680] train loss: 5.195020, tar: 0.546838 
l0: 0.390979, l1: 0.399538, l2: 0.399796, l3: 0.403332, l4: 0.434996, l5: 0.489534, l6: 0.669125

[epoch:  28/100, batch:   618/  792, ite: 48681] train loss: 5.193160, tar: 0.546609 
l0: 0.315856, l1: 0.315958, l2: 0.318409, l3: 0.324009, l4: 0.393656, l5: 0.486169, l6: 0.713617

[epoch:  28/100, batch:   620/  792, ite: 48682] train loss: 5.190937, tar: 0.546271 
l0: 0.291352, l1: 0.296535, l2: 0.289809, l3: 0.291894, l4: 0.293219, l5: 0.361409, l6: 0.470138

[epoch:  28/100, batch:   622/  792, ite: 48683] train loss: 5.187533, tar: 0.54

[epoch:  28/100, batch:   702/  792, ite: 48723] train loss: 5.169558, tar: 0.544071 
l0: 0.306828, l1: 0.311735, l2: 0.308544, l3: 0.321625, l4: 0.355958, l5: 0.503643, l6: 0.608676

[epoch:  28/100, batch:   704/  792, ite: 48724] train loss: 5.167102, tar: 0.543743 
l0: 2.136014, l1: 2.175374, l2: 2.161964, l3: 2.151333, l4: 2.330053, l5: 2.444053, l6: 2.635154

[epoch:  28/100, batch:   706/  792, ite: 48725] train loss: 5.185638, tar: 0.545939 
l0: 0.881374, l1: 0.891230, l2: 0.892229, l3: 0.898774, l4: 0.924017, l5: 1.078284, l6: 1.355150

[epoch:  28/100, batch:   708/  792, ite: 48726] train loss: 5.189882, tar: 0.546401 
l0: 0.939721, l1: 0.932223, l2: 0.929049, l3: 0.938535, l4: 0.924816, l5: 0.930159, l6: 0.808338

[epoch:  28/100, batch:   710/  792, ite: 48727] train loss: 5.192801, tar: 0.546942 
l0: 0.612939, l1: 0.614198, l2: 0.613856, l3: 0.611724, l4: 0.621770, l5: 0.697991, l6: 0.828630

[epoch:  28/100, batch:   712/  792, ite: 48728] train loss: 5.193165, tar: 0.54

[epoch:  28/100, batch:   792/  792, ite: 48768] train loss: 5.194418, tar: 0.546984 
Starting epoch 29
Epoch 29 loading complete
l0: 0.821541, l1: 0.808970, l2: 0.808451, l3: 0.817257, l4: 0.828250, l5: 0.810037, l6: 0.835050

[epoch:  29/100, batch:     2/  792, ite: 48769] train loss: 5.196365, tar: 0.547341 
l0: 0.431308, l1: 0.433651, l2: 0.435271, l3: 0.431918, l4: 0.459709, l5: 0.608722, l6: 0.829638

[epoch:  29/100, batch:     4/  792, ite: 48770] train loss: 5.195414, tar: 0.547190 
l0: 0.359617, l1: 0.362900, l2: 0.364015, l3: 0.369170, l4: 0.386282, l5: 0.541713, l6: 0.717290

[epoch:  29/100, batch:     6/  792, ite: 48771] train loss: 5.193621, tar: 0.546947 
l0: 0.767584, l1: 0.772785, l2: 0.771799, l3: 0.777336, l4: 0.823737, l5: 0.946861, l6: 1.116377

[epoch:  29/100, batch:     8/  792, ite: 48772] train loss: 5.196005, tar: 0.547233 
l0: 0.721756, l1: 0.740545, l2: 0.729888, l3: 0.737689, l4: 0.738695, l5: 0.857744, l6: 1.006362

[epoch:  29/100, batch:    10/  792,

l0: 0.520871, l1: 0.522270, l2: 0.520959, l3: 0.523148, l4: 0.542782, l5: 0.658806, l6: 0.817603

[epoch:  29/100, batch:    90/  792, ite: 48813] train loss: 5.187184, tar: 0.546338 
l0: 0.341801, l1: 0.349110, l2: 0.347389, l3: 0.340226, l4: 0.369672, l5: 0.531174, l6: 0.688843

[epoch:  29/100, batch:    92/  792, ite: 48814] train loss: 5.185357, tar: 0.546086 
l0: 0.713489, l1: 0.712297, l2: 0.717093, l3: 0.716670, l4: 0.741255, l5: 0.798475, l6: 0.986741

[epoch:  29/100, batch:    94/  792, ite: 48815] train loss: 5.186878, tar: 0.546292 
l0: 1.079301, l1: 1.083996, l2: 1.080887, l3: 1.093071, l4: 1.165896, l5: 1.240618, l6: 1.229534

[epoch:  29/100, batch:    96/  792, ite: 48816] train loss: 5.191842, tar: 0.546945 
l0: 0.246849, l1: 0.249325, l2: 0.250332, l3: 0.256910, l4: 0.278299, l5: 0.398723, l6: 0.539212

[epoch:  29/100, batch:    98/  792, ite: 48817] train loss: 5.188902, tar: 0.546578 
l0: 2.712685, l1: 2.732676, l2: 2.660352, l3: 2.623995, l4: 2.940932, l5: 3.2501

[epoch:  29/100, batch:   178/  792, ite: 48857] train loss: 5.219047, tar: 0.550261 
l0: 1.194923, l1: 1.229467, l2: 1.216895, l3: 1.221275, l4: 1.242830, l5: 1.203708, l6: 1.499796

[epoch:  29/100, batch:   180/  792, ite: 48858] train loss: 5.225054, tar: 0.551012 
l0: 0.462091, l1: 0.473560, l2: 0.468493, l3: 0.469588, l4: 0.495345, l5: 0.623649, l6: 0.885032

[epoch:  29/100, batch:   182/  792, ite: 48859] train loss: 5.224504, tar: 0.550908 
l0: 0.423438, l1: 0.427530, l2: 0.433420, l3: 0.454386, l4: 0.523134, l5: 0.643107, l6: 0.793070

[epoch:  29/100, batch:   184/  792, ite: 48860] train loss: 5.223724, tar: 0.550760 
l0: 0.206638, l1: 0.213766, l2: 0.214173, l3: 0.223280, l4: 0.263622, l5: 0.355745, l6: 0.437669

[epoch:  29/100, batch:   186/  792, ite: 48861] train loss: 5.220388, tar: 0.550361 
l0: 0.273688, l1: 0.280299, l2: 0.281657, l3: 0.282870, l4: 0.307881, l5: 0.379879, l6: 0.469405

[epoch:  29/100, batch:   188/  792, ite: 48862] train loss: 5.217576, tar: 0.55

[epoch:  29/100, batch:   268/  792, ite: 48902] train loss: 5.208664, tar: 0.548779 
l0: 0.623689, l1: 0.629124, l2: 0.629205, l3: 0.626277, l4: 0.630042, l5: 0.666928, l6: 0.675882

[epoch:  29/100, batch:   270/  792, ite: 48903] train loss: 5.208600, tar: 0.548862 
l0: 0.477386, l1: 0.476106, l2: 0.472223, l3: 0.481592, l4: 0.552570, l5: 0.691144, l6: 0.923444

[epoch:  29/100, batch:   272/  792, ite: 48904] train loss: 5.208493, tar: 0.548783 
l0: 0.616658, l1: 0.621726, l2: 0.618633, l3: 0.623990, l4: 0.641650, l5: 0.643429, l6: 0.779398

[epoch:  29/100, batch:   274/  792, ite: 48905] train loss: 5.208808, tar: 0.548858 
l0: 0.648942, l1: 0.656255, l2: 0.651096, l3: 0.682797, l4: 0.724133, l5: 0.924396, l6: 1.182374

[epoch:  29/100, batch:   276/  792, ite: 48906] train loss: 5.210577, tar: 0.548968 
l0: 0.632509, l1: 0.629760, l2: 0.626719, l3: 0.627617, l4: 0.652816, l5: 0.775019, l6: 0.808656

[epoch:  29/100, batch:   278/  792, ite: 48907] train loss: 5.211073, tar: 0.54

[epoch:  29/100, batch:   358/  792, ite: 48947] train loss: 5.204849, tar: 0.548454 
l0: 0.392687, l1: 0.393392, l2: 0.396341, l3: 0.413946, l4: 0.421353, l5: 0.480348, l6: 0.677031

[epoch:  29/100, batch:   360/  792, ite: 48948] train loss: 5.203561, tar: 0.548290 
l0: 0.522842, l1: 0.526520, l2: 0.527946, l3: 0.531694, l4: 0.567929, l5: 0.613934, l6: 0.879635

[epoch:  29/100, batch:   362/  792, ite: 48949] train loss: 5.203312, tar: 0.548263 
l0: 0.397282, l1: 0.395852, l2: 0.396491, l3: 0.410756, l4: 0.444984, l5: 0.568599, l6: 0.570485

[epoch:  29/100, batch:   364/  792, ite: 48950] train loss: 5.201771, tar: 0.548104 
l0: 0.278433, l1: 0.283247, l2: 0.283574, l3: 0.284910, l4: 0.327761, l5: 0.466125, l6: 0.615994

[epoch:  29/100, batch:   366/  792, ite: 48951] train loss: 5.199677, tar: 0.547820 
l0: 0.423237, l1: 0.429435, l2: 0.427898, l3: 0.423855, l4: 0.435302, l5: 0.476090, l6: 0.672490

[epoch:  29/100, batch:   368/  792, ite: 48952] train loss: 5.198455, tar: 0.54

[epoch:  29/100, batch:   448/  792, ite: 48992] train loss: 5.184374, tar: 0.545667 
l0: 0.560390, l1: 0.553036, l2: 0.548787, l3: 0.547253, l4: 0.597875, l5: 0.729443, l6: 0.848782

[epoch:  29/100, batch:   450/  792, ite: 48993] train loss: 5.184571, tar: 0.545682 
l0: 0.735496, l1: 0.738705, l2: 0.736033, l3: 0.732972, l4: 0.766150, l5: 0.787760, l6: 0.772363

[epoch:  29/100, batch:   452/  792, ite: 48994] train loss: 5.185545, tar: 0.545873 
l0: 0.982929, l1: 0.984382, l2: 0.989477, l3: 1.005448, l4: 0.998692, l5: 1.064997, l6: 1.131451

[epoch:  29/100, batch:   454/  792, ite: 48995] train loss: 5.188652, tar: 0.546312 
l0: 0.291170, l1: 0.289549, l2: 0.287273, l3: 0.293548, l4: 0.304688, l5: 0.397676, l6: 0.479909

[epoch:  29/100, batch:   456/  792, ite: 48996] train loss: 5.186318, tar: 0.546056 
l0: 1.048748, l1: 1.051842, l2: 1.050312, l3: 1.068258, l4: 1.114368, l5: 1.119427, l6: 1.196680

[epoch:  29/100, batch:   458/  792, ite: 48997] train loss: 5.190018, tar: 0.54

[epoch:  29/100, batch:   538/  792, ite: 49037] train loss: 5.172614, tar: 0.544775 
l0: 0.834714, l1: 0.838589, l2: 0.840551, l3: 0.845252, l4: 0.870182, l5: 0.937482, l6: 1.088828

[epoch:  29/100, batch:   540/  792, ite: 49038] train loss: 5.174847, tar: 0.545054 
l0: 0.523261, l1: 0.533840, l2: 0.539702, l3: 0.563773, l4: 0.572136, l5: 0.710053, l6: 0.774040

[epoch:  29/100, batch:   542/  792, ite: 49039] train loss: 5.174687, tar: 0.545033 
l0: 0.705383, l1: 0.714406, l2: 0.713610, l3: 0.724339, l4: 0.759462, l5: 0.832995, l6: 0.867467

[epoch:  29/100, batch:   544/  792, ite: 49040] train loss: 5.175655, tar: 0.545187 
l0: 0.703751, l1: 0.692698, l2: 0.699586, l3: 0.710950, l4: 0.741450, l5: 0.895268, l6: 1.106368

[epoch:  29/100, batch:   546/  792, ite: 49041] train loss: 5.177071, tar: 0.545340 
l0: 1.226703, l1: 1.262784, l2: 1.283556, l3: 1.292597, l4: 1.272041, l5: 1.198011, l6: 1.332393

[epoch:  29/100, batch:   548/  792, ite: 49042] train loss: 5.182156, tar: 0.54

[epoch:  29/100, batch:   628/  792, ite: 49082] train loss: 5.177201, tar: 0.544981 
l0: 0.425396, l1: 0.425957, l2: 0.426705, l3: 0.440294, l4: 0.479831, l5: 0.580478, l6: 0.644172

[epoch:  29/100, batch:   630/  792, ite: 49083] train loss: 5.176243, tar: 0.544870 
l0: 0.198369, l1: 0.198235, l2: 0.195507, l3: 0.197347, l4: 0.243867, l5: 0.333898, l6: 0.464081

[epoch:  29/100, batch:   632/  792, ite: 49084] train loss: 5.173641, tar: 0.544551 
l0: 1.015238, l1: 1.042456, l2: 1.046533, l3: 1.059297, l4: 1.110065, l5: 1.108136, l6: 1.189318

[epoch:  29/100, batch:   634/  792, ite: 49085] train loss: 5.177014, tar: 0.544984 
l0: 0.530656, l1: 0.532381, l2: 0.533053, l3: 0.535087, l4: 0.586852, l5: 0.722616, l6: 0.927196

[epoch:  29/100, batch:   636/  792, ite: 49086] train loss: 5.177117, tar: 0.544971 
l0: 0.289982, l1: 0.293965, l2: 0.294763, l3: 0.303234, l4: 0.311098, l5: 0.409177, l6: 0.625393

[epoch:  29/100, batch:   638/  792, ite: 49087] train loss: 5.175301, tar: 0.54

[epoch:  29/100, batch:   718/  792, ite: 49127] train loss: 5.186529, tar: 0.546064 
l0: 0.401942, l1: 0.407305, l2: 0.406768, l3: 0.422331, l4: 0.491962, l5: 0.623716, l6: 0.964423

[epoch:  29/100, batch:   720/  792, ite: 49128] train loss: 5.186150, tar: 0.545936 
l0: 0.446773, l1: 0.451813, l2: 0.448275, l3: 0.447700, l4: 0.468444, l5: 0.596087, l6: 0.757842

[epoch:  29/100, batch:   722/  792, ite: 49129] train loss: 5.185487, tar: 0.545848 
l0: 0.479658, l1: 0.478841, l2: 0.476720, l3: 0.483774, l4: 0.486629, l5: 0.613028, l6: 0.903403

[epoch:  29/100, batch:   724/  792, ite: 49130] train loss: 5.185249, tar: 0.545790 
l0: 0.527007, l1: 0.534170, l2: 0.534889, l3: 0.540062, l4: 0.554850, l5: 0.575683, l6: 0.654971

[epoch:  29/100, batch:   726/  792, ite: 49131] train loss: 5.184754, tar: 0.545773 
l0: 0.610076, l1: 0.618224, l2: 0.619335, l3: 0.611989, l4: 0.626850, l5: 0.767322, l6: 1.018605

[epoch:  29/100, batch:   728/  792, ite: 49132] train loss: 5.185377, tar: 0.54

l0: 0.532722, l1: 0.540294, l2: 0.537967, l3: 0.536477, l4: 0.558548, l5: 0.704522, l6: 1.028844

[epoch:  30/100, batch:    16/  792, ite: 49172] train loss: 5.180280, tar: 0.544785 
l0: 0.692087, l1: 0.690380, l2: 0.690827, l3: 0.693255, l4: 0.757667, l5: 0.806147, l6: 0.975846

[epoch:  30/100, batch:    18/  792, ite: 49173] train loss: 5.181380, tar: 0.544911 
l0: 0.240179, l1: 0.240839, l2: 0.237215, l3: 0.231513, l4: 0.263237, l5: 0.317900, l6: 0.477996

[epoch:  30/100, batch:    20/  792, ite: 49174] train loss: 5.179053, tar: 0.544651 
l0: 0.366264, l1: 0.373435, l2: 0.371266, l3: 0.371883, l4: 0.392685, l5: 0.495343, l6: 0.623912

[epoch:  30/100, batch:    22/  792, ite: 49175] train loss: 5.177742, tar: 0.544499 
l0: 0.471637, l1: 0.478275, l2: 0.478836, l3: 0.482032, l4: 0.499141, l5: 0.623944, l6: 0.817151

[epoch:  30/100, batch:    24/  792, ite: 49176] train loss: 5.177384, tar: 0.544437 
l0: 0.567744, l1: 0.574880, l2: 0.573236, l3: 0.571904, l4: 0.604246, l5: 0.6945

[epoch:  30/100, batch:   104/  792, ite: 49216] train loss: 5.177763, tar: 0.544418 
l0: 0.490435, l1: 0.495108, l2: 0.491812, l3: 0.489812, l4: 0.508660, l5: 0.578272, l6: 0.628113

[epoch:  30/100, batch:   106/  792, ite: 49217] train loss: 5.177095, tar: 0.544374 
l0: 0.551817, l1: 0.548372, l2: 0.551503, l3: 0.547466, l4: 0.589549, l5: 0.640516, l6: 0.830702

[epoch:  30/100, batch:   108/  792, ite: 49218] train loss: 5.177134, tar: 0.544380 
l0: 0.614979, l1: 0.615237, l2: 0.610104, l3: 0.614038, l4: 0.666980, l5: 0.843358, l6: 1.007596

[epoch:  30/100, batch:   110/  792, ite: 49219] train loss: 5.177849, tar: 0.544438 
l0: 0.397851, l1: 0.404994, l2: 0.406014, l3: 0.410330, l4: 0.453156, l5: 0.568231, l6: 0.646542

[epoch:  30/100, batch:   112/  792, ite: 49220] train loss: 5.176831, tar: 0.544318 
l0: 0.364446, l1: 0.365881, l2: 0.367101, l3: 0.365342, l4: 0.403030, l5: 0.516795, l6: 0.680189

[epoch:  30/100, batch:   114/  792, ite: 49221] train loss: 5.175726, tar: 0.54

[epoch:  30/100, batch:   194/  792, ite: 49261] train loss: 5.175023, tar: 0.544384 
l0: 1.038532, l1: 1.043323, l2: 1.038132, l3: 1.044731, l4: 1.086386, l5: 1.160109, l6: 1.240576

[epoch:  30/100, batch:   196/  792, ite: 49262] train loss: 5.177955, tar: 0.544776 
l0: 0.332919, l1: 0.334856, l2: 0.335790, l3: 0.337714, l4: 0.358265, l5: 0.422465, l6: 0.566538

[epoch:  30/100, batch:   198/  792, ite: 49263] train loss: 5.176583, tar: 0.544608 
l0: 0.556199, l1: 0.553146, l2: 0.568094, l3: 0.589070, l4: 0.582232, l5: 0.632505, l6: 0.717282

[epoch:  30/100, batch:   200/  792, ite: 49264] train loss: 5.176489, tar: 0.544617 
l0: 0.451561, l1: 0.451626, l2: 0.449364, l3: 0.456221, l4: 0.500464, l5: 0.648270, l6: 0.853839

[epoch:  30/100, batch:   202/  792, ite: 49265] train loss: 5.176166, tar: 0.544544 
l0: 0.746250, l1: 0.755137, l2: 0.754095, l3: 0.773450, l4: 0.788344, l5: 0.764242, l6: 1.041081

[epoch:  30/100, batch:   204/  792, ite: 49266] train loss: 5.177447, tar: 0.54

[epoch:  30/100, batch:   284/  792, ite: 49306] train loss: 5.181513, tar: 0.545318 
l0: 0.203405, l1: 0.209834, l2: 0.208809, l3: 0.218973, l4: 0.253150, l5: 0.357420, l6: 0.476691

[epoch:  30/100, batch:   286/  792, ite: 49307] train loss: 5.179419, tar: 0.545057 
l0: 0.266277, l1: 0.271228, l2: 0.276716, l3: 0.276020, l4: 0.290074, l5: 0.421475, l6: 0.488127

[epoch:  30/100, batch:   288/  792, ite: 49308] train loss: 5.177728, tar: 0.544844 
l0: 0.473246, l1: 0.473346, l2: 0.472643, l3: 0.489500, l4: 0.530582, l5: 0.671494, l6: 0.882617

[epoch:  30/100, batch:   290/  792, ite: 49309] train loss: 5.177637, tar: 0.544789 
l0: 0.431906, l1: 0.432206, l2: 0.435565, l3: 0.447196, l4: 0.493130, l5: 0.628508, l6: 0.739792

[epoch:  30/100, batch:   292/  792, ite: 49310] train loss: 5.177062, tar: 0.544703 
l0: 0.668985, l1: 0.672794, l2: 0.672131, l3: 0.678679, l4: 0.665561, l5: 0.642514, l6: 0.744293

[epoch:  30/100, batch:   294/  792, ite: 49311] train loss: 5.177393, tar: 0.54

[epoch:  30/100, batch:   374/  792, ite: 49351] train loss: 5.176116, tar: 0.544620 
l0: 0.331589, l1: 0.332619, l2: 0.339107, l3: 0.347276, l4: 0.358918, l5: 0.482478, l6: 0.694541

[epoch:  30/100, batch:   376/  792, ite: 49352] train loss: 5.174979, tar: 0.544462 
l0: 0.267303, l1: 0.272158, l2: 0.274460, l3: 0.281672, l4: 0.320762, l5: 0.460310, l6: 0.611875

[epoch:  30/100, batch:   378/  792, ite: 49353] train loss: 5.173594, tar: 0.544257 
l0: 0.677413, l1: 0.677986, l2: 0.682972, l3: 0.697477, l4: 0.744254, l5: 0.873327, l6: 1.402788

[epoch:  30/100, batch:   380/  792, ite: 49354] train loss: 5.175141, tar: 0.544356 
l0: 0.466934, l1: 0.467734, l2: 0.465574, l3: 0.478187, l4: 0.494244, l5: 0.614956, l6: 0.761536

[epoch:  30/100, batch:   382/  792, ite: 49355] train loss: 5.174695, tar: 0.544299 
l0: 0.891977, l1: 0.900102, l2: 0.898992, l3: 0.905683, l4: 0.901559, l5: 0.910953, l6: 1.038862

[epoch:  30/100, batch:   384/  792, ite: 49356] train loss: 5.176445, tar: 0.54

[epoch:  30/100, batch:   464/  792, ite: 49396] train loss: 5.182780, tar: 0.545402 
l0: 0.505444, l1: 0.514388, l2: 0.517676, l3: 0.527719, l4: 0.539524, l5: 0.574086, l6: 0.820627

[epoch:  30/100, batch:   466/  792, ite: 49397] train loss: 5.182572, tar: 0.545373 
l0: 0.305347, l1: 0.303441, l2: 0.303795, l3: 0.318057, l4: 0.371869, l5: 0.435488, l6: 0.715612

[epoch:  30/100, batch:   468/  792, ite: 49398] train loss: 5.181386, tar: 0.545201 
l0: 0.724196, l1: 0.729470, l2: 0.731406, l3: 0.738383, l4: 0.794440, l5: 0.898358, l6: 1.018454

[epoch:  30/100, batch:   470/  792, ite: 49399] train loss: 5.182606, tar: 0.545329 
l0: 0.345896, l1: 0.344388, l2: 0.344048, l3: 0.344470, l4: 0.379099, l5: 0.488330, l6: 0.614811

[epoch:  30/100, batch:   472/  792, ite: 49400] train loss: 5.181397, tar: 0.545187 
l0: 0.471734, l1: 0.476984, l2: 0.475438, l3: 0.479584, l4: 0.502511, l5: 0.631354, l6: 0.842142

[epoch:  30/100, batch:   474/  792, ite: 49401] train loss: 5.181128, tar: 0.54

[epoch:  30/100, batch:   554/  792, ite: 49441] train loss: 5.185986, tar: 0.545998 
l0: 0.250646, l1: 0.251044, l2: 0.250400, l3: 0.262727, l4: 0.299826, l5: 0.398163, l6: 0.512391

[epoch:  30/100, batch:   556/  792, ite: 49442] train loss: 5.184337, tar: 0.545793 
l0: 0.655897, l1: 0.658442, l2: 0.649953, l3: 0.647141, l4: 0.657745, l5: 0.689561, l6: 0.803163

[epoch:  30/100, batch:   558/  792, ite: 49443] train loss: 5.184633, tar: 0.545869 
l0: 0.369710, l1: 0.374824, l2: 0.375710, l3: 0.373649, l4: 0.417051, l5: 0.501357, l6: 0.646899

[epoch:  30/100, batch:   560/  792, ite: 49444] train loss: 5.183634, tar: 0.545747 
l0: 0.559162, l1: 0.564553, l2: 0.565150, l3: 0.581852, l4: 0.644517, l5: 0.753211, l6: 0.936909

[epoch:  30/100, batch:   562/  792, ite: 49445] train loss: 5.183899, tar: 0.545757 
l0: 0.459172, l1: 0.457557, l2: 0.453063, l3: 0.446116, l4: 0.481476, l5: 0.536718, l6: 0.679389

[epoch:  30/100, batch:   564/  792, ite: 49446] train loss: 5.183268, tar: 0.54

[epoch:  30/100, batch:   644/  792, ite: 49486] train loss: 5.177750, tar: 0.545113 
l0: 0.349775, l1: 0.352515, l2: 0.351578, l3: 0.347394, l4: 0.365004, l5: 0.466915, l6: 0.585441

[epoch:  30/100, batch:   646/  792, ite: 49487] train loss: 5.176543, tar: 0.544981 
l0: 0.778968, l1: 0.775191, l2: 0.774325, l3: 0.782194, l4: 0.827151, l5: 0.932006, l6: 1.116844

[epoch:  30/100, batch:   648/  792, ite: 49488] train loss: 5.177842, tar: 0.545139 
l0: 0.427428, l1: 0.426548, l2: 0.425979, l3: 0.431855, l4: 0.455399, l5: 0.559554, l6: 0.738885

[epoch:  30/100, batch:   650/  792, ite: 49489] train loss: 5.177238, tar: 0.545060 
l0: 0.328195, l1: 0.334226, l2: 0.339644, l3: 0.340921, l4: 0.324463, l5: 0.375146, l6: 0.482930

[epoch:  30/100, batch:   652/  792, ite: 49490] train loss: 5.175845, tar: 0.544914 
l0: 0.404290, l1: 0.412222, l2: 0.410349, l3: 0.418349, l4: 0.453944, l5: 0.654251, l6: 0.892339

[epoch:  30/100, batch:   654/  792, ite: 49491] train loss: 5.175423, tar: 0.54

[epoch:  30/100, batch:   734/  792, ite: 49531] train loss: 5.176680, tar: 0.544959 
l0: 0.535492, l1: 0.540441, l2: 0.537001, l3: 0.531421, l4: 0.539863, l5: 0.613867, l6: 1.007569

[epoch:  30/100, batch:   736/  792, ite: 49532] train loss: 5.176792, tar: 0.544953 
l0: 0.769149, l1: 0.776626, l2: 0.780640, l3: 0.793824, l4: 0.827388, l5: 0.896860, l6: 1.050869

[epoch:  30/100, batch:   738/  792, ite: 49533] train loss: 5.177963, tar: 0.545099 
l0: 2.300050, l1: 2.272493, l2: 2.296612, l3: 2.322086, l4: 2.447377, l5: 2.484445, l6: 2.842811

[epoch:  30/100, batch:   740/  792, ite: 49534] train loss: 5.187244, tar: 0.546243 
l0: 0.858207, l1: 0.860792, l2: 0.847142, l3: 0.856091, l4: 0.870438, l5: 0.960755, l6: 1.088612

[epoch:  30/100, batch:   742/  792, ite: 49535] train loss: 5.188903, tar: 0.546447 
l0: 0.395004, l1: 0.394477, l2: 0.391966, l3: 0.407369, l4: 0.422666, l5: 0.496235, l6: 0.646402

[epoch:  30/100, batch:   744/  792, ite: 49536] train loss: 5.188008, tar: 0.54

l0: 0.299156, l1: 0.299300, l2: 0.298975, l3: 0.295595, l4: 0.317372, l5: 0.410394, l6: 0.525156

[epoch:  31/100, batch:    32/  792, ite: 49576] train loss: 5.187846, tar: 0.546492 
l0: 0.936922, l1: 0.944313, l2: 0.941745, l3: 0.941700, l4: 0.949449, l5: 0.920344, l6: 1.052822

[epoch:  31/100, batch:    34/  792, ite: 49577] train loss: 5.189468, tar: 0.546739 
l0: 0.795466, l1: 0.792338, l2: 0.794752, l3: 0.793561, l4: 0.808056, l5: 0.930578, l6: 1.028709

[epoch:  31/100, batch:    36/  792, ite: 49578] train loss: 5.190584, tar: 0.546897 
l0: 0.746246, l1: 0.745318, l2: 0.749217, l3: 0.760067, l4: 0.802266, l5: 0.890491, l6: 1.131747

[epoch:  31/100, batch:    38/  792, ite: 49579] train loss: 5.191757, tar: 0.547023 
l0: 0.627341, l1: 0.627120, l2: 0.630015, l3: 0.638291, l4: 0.671724, l5: 0.739091, l6: 1.036015

[epoch:  31/100, batch:    40/  792, ite: 49580] train loss: 5.192474, tar: 0.547074 
l0: 0.297144, l1: 0.300966, l2: 0.300253, l3: 0.315819, l4: 0.367532, l5: 0.5512

[epoch:  31/100, batch:   120/  792, ite: 49620] train loss: 5.184092, tar: 0.545655 
l0: 0.766777, l1: 0.765104, l2: 0.765339, l3: 0.761990, l4: 0.784326, l5: 0.915579, l6: 1.150744

[epoch:  31/100, batch:   122/  792, ite: 49621] train loss: 5.185303, tar: 0.545791 
l0: 0.332356, l1: 0.338966, l2: 0.337036, l3: 0.341196, l4: 0.349346, l5: 0.497310, l6: 0.637396

[epoch:  31/100, batch:   124/  792, ite: 49622] train loss: 5.184224, tar: 0.545660 
l0: 0.513873, l1: 0.522446, l2: 0.519729, l3: 0.519761, l4: 0.538349, l5: 0.577886, l6: 0.784308

[epoch:  31/100, batch:   126/  792, ite: 49623] train loss: 5.183986, tar: 0.545640 
l0: 0.482860, l1: 0.481470, l2: 0.483624, l3: 0.489242, l4: 0.507053, l5: 0.639667, l6: 0.868164

[epoch:  31/100, batch:   128/  792, ite: 49624] train loss: 5.183799, tar: 0.545601 
l0: 0.384205, l1: 0.383844, l2: 0.387296, l3: 0.396760, l4: 0.420845, l5: 0.491789, l6: 0.687320

[epoch:  31/100, batch:   130/  792, ite: 49625] train loss: 5.183006, tar: 0.54

[epoch:  31/100, batch:   210/  792, ite: 49665] train loss: 5.188207, tar: 0.546125 
l0: 0.385247, l1: 0.393696, l2: 0.391519, l3: 0.393080, l4: 0.408279, l5: 0.514625, l6: 0.625820

[epoch:  31/100, batch:   212/  792, ite: 49666] train loss: 5.187411, tar: 0.546028 
l0: 0.583628, l1: 0.583686, l2: 0.585724, l3: 0.594765, l4: 0.594633, l5: 0.680926, l6: 0.823488

[epoch:  31/100, batch:   214/  792, ite: 49667] train loss: 5.187508, tar: 0.546051 
l0: 0.537373, l1: 0.543414, l2: 0.546572, l3: 0.538921, l4: 0.550518, l5: 0.666509, l6: 0.879843

[epoch:  31/100, batch:   216/  792, ite: 49668] train loss: 5.187552, tar: 0.546046 
l0: 0.396222, l1: 0.396661, l2: 0.393429, l3: 0.389808, l4: 0.452967, l5: 0.637270, l6: 0.879480

[epoch:  31/100, batch:   218/  792, ite: 49669] train loss: 5.187172, tar: 0.545956 
l0: 0.361251, l1: 0.364359, l2: 0.362751, l3: 0.369256, l4: 0.410453, l5: 0.472563, l6: 0.550408

[epoch:  31/100, batch:   220/  792, ite: 49670] train loss: 5.186150, tar: 0.54

[epoch:  31/100, batch:   300/  792, ite: 49710] train loss: 5.176340, tar: 0.544482 
l0: 0.308344, l1: 0.306633, l2: 0.300190, l3: 0.292777, l4: 0.309432, l5: 0.389552, l6: 0.460558

[epoch:  31/100, batch:   302/  792, ite: 49711] train loss: 5.174985, tar: 0.544344 
l0: 1.228997, l1: 1.238007, l2: 1.250960, l3: 1.286269, l4: 1.305273, l5: 1.237231, l6: 1.140303

[epoch:  31/100, batch:   304/  792, ite: 49712] train loss: 5.177699, tar: 0.544744 
l0: 0.617572, l1: 0.624887, l2: 0.621655, l3: 0.632254, l4: 0.622146, l5: 0.710226, l6: 0.772173

[epoch:  31/100, batch:   306/  792, ite: 49713] train loss: 5.177820, tar: 0.544786 
l0: 0.582622, l1: 0.587537, l2: 0.594906, l3: 0.593132, l4: 0.606121, l5: 0.753119, l6: 0.958600

[epoch:  31/100, batch:   308/  792, ite: 49714] train loss: 5.178194, tar: 0.544808 
l0: 0.446645, l1: 0.450052, l2: 0.454477, l3: 0.471248, l4: 0.481065, l5: 0.518896, l6: 0.634500

[epoch:  31/100, batch:   310/  792, ite: 49715] train loss: 5.177629, tar: 0.54

[epoch:  31/100, batch:   390/  792, ite: 49755] train loss: 5.175985, tar: 0.544609 
l0: 0.693107, l1: 0.693676, l2: 0.696262, l3: 0.710886, l4: 0.710634, l5: 0.818132, l6: 0.907436

[epoch:  31/100, batch:   392/  792, ite: 49756] train loss: 5.176561, tar: 0.544693 
l0: 0.596279, l1: 0.607104, l2: 0.603443, l3: 0.616145, l4: 0.641743, l5: 0.894186, l6: 1.067760

[epoch:  31/100, batch:   394/  792, ite: 49757] train loss: 5.177075, tar: 0.544723 
l0: 0.523963, l1: 0.530560, l2: 0.533353, l3: 0.525046, l4: 0.549173, l5: 0.653579, l6: 1.032808

[epoch:  31/100, batch:   396/  792, ite: 49758] train loss: 5.177191, tar: 0.544711 
l0: 0.505695, l1: 0.507297, l2: 0.508665, l3: 0.509125, l4: 0.511103, l5: 0.583278, l6: 0.711320

[epoch:  31/100, batch:   398/  792, ite: 49759] train loss: 5.176901, tar: 0.544689 
l0: 0.291775, l1: 0.288586, l2: 0.288369, l3: 0.292256, l4: 0.338396, l5: 0.409497, l6: 0.490968

[epoch:  31/100, batch:   400/  792, ite: 49760] train loss: 5.175619, tar: 0.54

[epoch:  31/100, batch:   480/  792, ite: 49800] train loss: 5.171990, tar: 0.544110 
l0: 0.385952, l1: 0.387879, l2: 0.382540, l3: 0.399212, l4: 0.440707, l5: 0.594244, l6: 0.854878

[epoch:  31/100, batch:   482/  792, ite: 49801] train loss: 5.171568, tar: 0.544022 
l0: 0.730728, l1: 0.743332, l2: 0.733652, l3: 0.748535, l4: 0.746075, l5: 0.796442, l6: 0.845251

[epoch:  31/100, batch:   484/  792, ite: 49802] train loss: 5.172133, tar: 0.544126 
l0: 0.601958, l1: 0.601813, l2: 0.596209, l3: 0.599811, l4: 0.642410, l5: 0.728474, l6: 1.567568

[epoch:  31/100, batch:   486/  792, ite: 49803] train loss: 5.172981, tar: 0.544158 
l0: 0.997863, l1: 1.009901, l2: 1.012065, l3: 1.014012, l4: 1.079766, l5: 1.178905, l6: 1.265906

[epoch:  31/100, batch:   488/  792, ite: 49804] train loss: 5.175021, tar: 0.544409 
l0: 0.457897, l1: 0.460823, l2: 0.462602, l3: 0.462205, l4: 0.480042, l5: 0.678015, l6: 0.951808

[epoch:  31/100, batch:   490/  792, ite: 49805] train loss: 5.174903, tar: 0.54

[epoch:  31/100, batch:   570/  792, ite: 49845] train loss: 5.168516, tar: 0.543372 
l0: 0.833104, l1: 0.845498, l2: 0.831947, l3: 0.826032, l4: 0.825626, l5: 0.896743, l6: 0.990727

[epoch:  31/100, batch:   572/  792, ite: 49846] train loss: 5.169545, tar: 0.543529 
l0: 0.522637, l1: 0.525781, l2: 0.521628, l3: 0.528941, l4: 0.565204, l5: 0.621329, l6: 0.882295

[epoch:  31/100, batch:   574/  792, ite: 49847] train loss: 5.169536, tar: 0.543518 
l0: 0.513307, l1: 0.519731, l2: 0.524892, l3: 0.530129, l4: 0.576181, l5: 0.704363, l6: 1.055564

[epoch:  31/100, batch:   576/  792, ite: 49848] train loss: 5.169716, tar: 0.543501 
l0: 0.813792, l1: 0.814439, l2: 0.811551, l3: 0.820867, l4: 0.872118, l5: 0.918306, l6: 1.148689

[epoch:  31/100, batch:   578/  792, ite: 49849] train loss: 5.170925, tar: 0.543648 
l0: 0.652032, l1: 0.658307, l2: 0.661035, l3: 0.681413, l4: 0.694074, l5: 0.878582, l6: 1.122968

[epoch:  31/100, batch:   580/  792, ite: 49850] train loss: 5.171602, tar: 0.54

[epoch:  31/100, batch:   660/  792, ite: 49890] train loss: 5.176967, tar: 0.544435 
l0: 0.398023, l1: 0.398337, l2: 0.399924, l3: 0.412767, l4: 0.425309, l5: 0.516227, l6: 0.647776

[epoch:  31/100, batch:   662/  792, ite: 49891] train loss: 5.176296, tar: 0.544358 
l0: 0.588455, l1: 0.585018, l2: 0.583216, l3: 0.574685, l4: 0.554824, l5: 0.667296, l6: 0.874956

[epoch:  31/100, batch:   664/  792, ite: 49892] train loss: 5.176442, tar: 0.544381 
l0: 0.683932, l1: 0.682562, l2: 0.692939, l3: 0.704939, l4: 0.700685, l5: 0.787484, l6: 0.907060

[epoch:  31/100, batch:   666/  792, ite: 49893] train loss: 5.176961, tar: 0.544455 
l0: 0.383471, l1: 0.385136, l2: 0.385993, l3: 0.396119, l4: 0.431500, l5: 0.632972, l6: 0.726835

[epoch:  31/100, batch:   668/  792, ite: 49894] train loss: 5.176365, tar: 0.544370 
l0: 0.409020, l1: 0.404698, l2: 0.406525, l3: 0.406259, l4: 0.403447, l5: 0.444635, l6: 0.550683

[epoch:  31/100, batch:   670/  792, ite: 49895] train loss: 5.175572, tar: 0.54

[epoch:  31/100, batch:   750/  792, ite: 49935] train loss: 5.167686, tar: 0.543450 
l0: 0.569332, l1: 0.577713, l2: 0.574588, l3: 0.569303, l4: 0.607592, l5: 0.716212, l6: 0.919508

[epoch:  31/100, batch:   752/  792, ite: 49936] train loss: 5.167894, tar: 0.543464 
l0: 0.389709, l1: 0.387687, l2: 0.385888, l3: 0.398197, l4: 0.428820, l5: 0.522880, l6: 0.606191

[epoch:  31/100, batch:   754/  792, ite: 49937] train loss: 5.167200, tar: 0.543384 
l0: 0.545137, l1: 0.550313, l2: 0.551304, l3: 0.555485, l4: 0.587135, l5: 0.730934, l6: 0.843022

[epoch:  31/100, batch:   756/  792, ite: 49938] train loss: 5.167260, tar: 0.543385 
l0: 0.285958, l1: 0.288204, l2: 0.289218, l3: 0.292274, l4: 0.307136, l5: 0.398259, l6: 0.507534

[epoch:  31/100, batch:   758/  792, ite: 49939] train loss: 5.166120, tar: 0.543252 
l0: 0.439125, l1: 0.442670, l2: 0.443413, l3: 0.440483, l4: 0.460810, l5: 0.562311, l6: 0.642063

[epoch:  31/100, batch:   760/  792, ite: 49940] train loss: 5.165633, tar: 0.54

l0: 0.555211, l1: 0.561200, l2: 0.558319, l3: 0.568362, l4: 0.597964, l5: 0.633977, l6: 0.805714

[epoch:  32/100, batch:    48/  792, ite: 49980] train loss: 5.167225, tar: 0.543363 
l0: 0.309174, l1: 0.311058, l2: 0.304662, l3: 0.305106, l4: 0.344685, l5: 0.409870, l6: 0.561030

[epoch:  32/100, batch:    50/  792, ite: 49981] train loss: 5.166205, tar: 0.543244 
l0: 0.979182, l1: 0.985628, l2: 0.993220, l3: 0.995817, l4: 1.013098, l5: 1.084181, l6: 1.252361

[epoch:  32/100, batch:    52/  792, ite: 49982] train loss: 5.167959, tar: 0.543464 
l0: 0.234898, l1: 0.235552, l2: 0.233457, l3: 0.239598, l4: 0.259146, l5: 0.437186, l6: 0.502989

[epoch:  32/100, batch:    54/  792, ite: 49983] train loss: 5.166706, tar: 0.543309 
l0: 0.266915, l1: 0.267534, l2: 0.267068, l3: 0.268243, l4: 0.325275, l5: 0.448935, l6: 0.533446

[epoch:  32/100, batch:    56/  792, ite: 49984] train loss: 5.165593, tar: 0.543169 
l0: 0.307168, l1: 0.307676, l2: 0.305384, l3: 0.308675, l4: 0.324648, l5: 0.4625

[epoch:  32/100, batch:   136/  792, ite: 50024] train loss: 4.925864, tar: 0.527111 
l0: 0.681314, l1: 0.690656, l2: 0.677801, l3: 0.682700, l4: 0.678903, l5: 0.762051, l6: 0.832982

[epoch:  32/100, batch:   138/  792, ite: 50025] train loss: 4.962513, tar: 0.533279 
l0: 0.545258, l1: 0.544254, l2: 0.540599, l3: 0.550630, l4: 0.564690, l5: 0.738547, l6: 0.978892

[epoch:  32/100, batch:   140/  792, ite: 50026] train loss: 4.983449, tar: 0.533740 
l0: 0.422217, l1: 0.432536, l2: 0.430573, l3: 0.423676, l4: 0.459917, l5: 0.567779, l6: 0.785475

[epoch:  32/100, batch:   142/  792, ite: 50027] train loss: 4.962146, tar: 0.529609 
l0: 0.578331, l1: 0.584303, l2: 0.583512, l3: 0.601132, l4: 0.666816, l5: 0.863650, l6: 1.194654

[epoch:  32/100, batch:   144/  792, ite: 50028] train loss: 5.000542, tar: 0.531349 
l0: 0.259635, l1: 0.262890, l2: 0.264442, l3: 0.262362, l4: 0.287299, l5: 0.368295, l6: 0.478217

[epoch:  32/100, batch:   146/  792, ite: 50029] train loss: 4.921380, tar: 0.52

[epoch:  32/100, batch:   226/  792, ite: 50069] train loss: 4.947094, tar: 0.516468 
l0: 0.503369, l1: 0.507874, l2: 0.508348, l3: 0.506561, l4: 0.522800, l5: 0.629725, l6: 0.816849

[epoch:  32/100, batch:   228/  792, ite: 50070] train loss: 4.947319, tar: 0.516281 
l0: 0.469001, l1: 0.472512, l2: 0.472248, l3: 0.484404, l4: 0.497823, l5: 0.593898, l6: 0.739106

[epoch:  32/100, batch:   230/  792, ite: 50071] train loss: 4.941559, tar: 0.515615 
l0: 0.697607, l1: 0.697526, l2: 0.701899, l3: 0.707335, l4: 0.699865, l5: 0.758875, l6: 0.782859

[epoch:  32/100, batch:   232/  792, ite: 50072] train loss: 4.954563, tar: 0.518143 
l0: 0.482406, l1: 0.482673, l2: 0.484402, l3: 0.495769, l4: 0.499012, l5: 0.629736, l6: 0.737678

[epoch:  32/100, batch:   234/  792, ite: 50073] train loss: 4.949451, tar: 0.517653 
l0: 0.452878, l1: 0.455104, l2: 0.455768, l3: 0.461572, l4: 0.499418, l5: 0.530468, l6: 0.605915

[epoch:  32/100, batch:   236/  792, ite: 50074] train loss: 4.938264, tar: 0.51

[epoch:  32/100, batch:   316/  792, ite: 50114] train loss: 5.039157, tar: 0.530379 
l0: 0.570128, l1: 0.579682, l2: 0.577481, l3: 0.582042, l4: 0.604239, l5: 0.726575, l6: 0.929032

[epoch:  32/100, batch:   318/  792, ite: 50115] train loss: 5.044182, tar: 0.530725 
l0: 0.887748, l1: 0.892560, l2: 0.887292, l3: 0.868371, l4: 0.860520, l5: 0.933617, l6: 1.010697

[epoch:  32/100, batch:   320/  792, ite: 50116] train loss: 5.066573, tar: 0.533803 
l0: 0.408062, l1: 0.413041, l2: 0.415339, l3: 0.412094, l4: 0.427515, l5: 0.464007, l6: 0.608869

[epoch:  32/100, batch:   322/  792, ite: 50117] train loss: 5.055527, tar: 0.532728 
l0: 0.272832, l1: 0.278525, l2: 0.280115, l3: 0.281369, l4: 0.325614, l5: 0.420861, l6: 0.570860

[epoch:  32/100, batch:   324/  792, ite: 50118] train loss: 5.038300, tar: 0.530525 
l0: 0.367952, l1: 0.369016, l2: 0.368431, l3: 0.370412, l4: 0.384442, l5: 0.478855, l6: 0.547857

[epoch:  32/100, batch:   326/  792, ite: 50119] train loss: 5.025586, tar: 0.52

[epoch:  32/100, batch:   406/  792, ite: 50159] train loss: 5.160844, tar: 0.546723 
l0: 0.435111, l1: 0.444081, l2: 0.440877, l3: 0.452061, l4: 0.487398, l5: 0.562966, l6: 0.766994

[epoch:  32/100, batch:   408/  792, ite: 50160] train loss: 5.156014, tar: 0.546025 
l0: 0.457382, l1: 0.457955, l2: 0.454059, l3: 0.449098, l4: 0.488303, l5: 0.581527, l6: 0.676049

[epoch:  32/100, batch:   410/  792, ite: 50161] train loss: 5.151084, tar: 0.545475 
l0: 0.344918, l1: 0.349317, l2: 0.343300, l3: 0.348772, l4: 0.345875, l5: 0.475173, l6: 0.506004

[epoch:  32/100, batch:   412/  792, ite: 50162] train loss: 5.139585, tar: 0.544237 
l0: 0.463434, l1: 0.459509, l2: 0.461752, l3: 0.469553, l4: 0.487281, l5: 0.550141, l6: 0.709209

[epoch:  32/100, batch:   414/  792, ite: 50163] train loss: 5.134401, tar: 0.543741 
l0: 0.607109, l1: 0.612538, l2: 0.615518, l3: 0.616788, l4: 0.644972, l5: 0.792868, l6: 0.882698

[epoch:  32/100, batch:   416/  792, ite: 50164] train loss: 5.137228, tar: 0.54

[epoch:  32/100, batch:   496/  792, ite: 50204] train loss: 5.142895, tar: 0.545580 
l0: 0.354284, l1: 0.354598, l2: 0.355804, l3: 0.356724, l4: 0.399125, l5: 0.478721, l6: 0.778217

[epoch:  32/100, batch:   498/  792, ite: 50205] train loss: 5.136847, tar: 0.544647 
l0: 0.297857, l1: 0.299028, l2: 0.296496, l3: 0.301470, l4: 0.307599, l5: 0.353073, l6: 0.467862

[epoch:  32/100, batch:   500/  792, ite: 50206] train loss: 5.125768, tar: 0.543449 
l0: 0.826277, l1: 0.832411, l2: 0.830742, l3: 0.825731, l4: 0.874729, l5: 0.852924, l6: 0.935652

[epoch:  32/100, batch:   502/  792, ite: 50207] train loss: 5.134541, tar: 0.544815 
l0: 0.606007, l1: 0.610935, l2: 0.609022, l3: 0.613349, l4: 0.608505, l5: 0.595458, l6: 0.785123

[epoch:  32/100, batch:   504/  792, ite: 50208] train loss: 5.135431, tar: 0.545109 
l0: 0.474331, l1: 0.471589, l2: 0.475621, l3: 0.481160, l4: 0.484177, l5: 0.578926, l6: 0.741272

[epoch:  32/100, batch:   506/  792, ite: 50209] train loss: 5.132086, tar: 0.54

[epoch:  32/100, batch:   586/  792, ite: 50249] train loss: 5.081182, tar: 0.537028 
l0: 0.303097, l1: 0.307846, l2: 0.311457, l3: 0.329785, l4: 0.358766, l5: 0.483412, l6: 0.620311

[epoch:  32/100, batch:   588/  792, ite: 50250] train loss: 5.074464, tar: 0.536093 
l0: 0.334179, l1: 0.332451, l2: 0.333995, l3: 0.345019, l4: 0.394621, l5: 0.486893, l6: 0.555310

[epoch:  32/100, batch:   590/  792, ite: 50251] train loss: 5.067743, tar: 0.535288 
l0: 0.714146, l1: 0.713088, l2: 0.712580, l3: 0.718539, l4: 0.729807, l5: 0.758014, l6: 0.728516

[epoch:  32/100, batch:   592/  792, ite: 50252] train loss: 5.070830, tar: 0.535998 
l0: 0.811298, l1: 0.810902, l2: 0.810862, l3: 0.797755, l4: 0.817614, l5: 0.902122, l6: 1.085459

[epoch:  32/100, batch:   594/  792, ite: 50253] train loss: 5.078920, tar: 0.537086 
l0: 0.468016, l1: 0.469325, l2: 0.470291, l3: 0.469622, l4: 0.489730, l5: 0.563018, l6: 0.687262

[epoch:  32/100, batch:   596/  792, ite: 50254] train loss: 5.076354, tar: 0.53

[epoch:  32/100, batch:   676/  792, ite: 50294] train loss: 5.078066, tar: 0.537340 
l0: 0.396613, l1: 0.400241, l2: 0.401253, l3: 0.412656, l4: 0.421975, l5: 0.551193, l6: 0.676247

[epoch:  32/100, batch:   678/  792, ite: 50295] train loss: 5.074358, tar: 0.536863 
l0: 0.576695, l1: 0.579487, l2: 0.580898, l3: 0.583466, l4: 0.610311, l5: 0.755990, l6: 0.904417

[epoch:  32/100, batch:   680/  792, ite: 50296] train loss: 5.076094, tar: 0.536997 
l0: 0.458122, l1: 0.459627, l2: 0.461006, l3: 0.465799, l4: 0.487864, l5: 0.527480, l6: 0.638512

[epoch:  32/100, batch:   682/  792, ite: 50297] train loss: 5.073213, tar: 0.536732 
l0: 0.274235, l1: 0.284815, l2: 0.279963, l3: 0.290476, l4: 0.300296, l5: 0.435075, l6: 0.507771

[epoch:  32/100, batch:   684/  792, ite: 50298] train loss: 5.066518, tar: 0.535851 
l0: 0.371710, l1: 0.385877, l2: 0.384296, l3: 0.389482, l4: 0.409013, l5: 0.518012, l6: 0.623791

[epoch:  32/100, batch:   686/  792, ite: 50299] train loss: 5.062021, tar: 0.53

[epoch:  32/100, batch:   766/  792, ite: 50339] train loss: 5.077890, tar: 0.535658 
l0: 0.910624, l1: 0.918438, l2: 0.920635, l3: 0.922620, l4: 0.962515, l5: 1.125488, l6: 1.439810

[epoch:  32/100, batch:   768/  792, ite: 50340] train loss: 5.088464, tar: 0.536761 
l0: 0.873772, l1: 0.895258, l2: 0.890402, l3: 0.890541, l4: 0.886284, l5: 0.912901, l6: 0.917147

[epoch:  32/100, batch:   770/  792, ite: 50341] train loss: 5.094693, tar: 0.537749 
l0: 0.507123, l1: 0.509427, l2: 0.509778, l3: 0.513676, l4: 0.532796, l5: 0.545624, l6: 0.727487

[epoch:  32/100, batch:   772/  792, ite: 50342] train loss: 5.093289, tar: 0.537660 
l0: 0.416630, l1: 0.420019, l2: 0.418474, l3: 0.422081, l4: 0.464628, l5: 0.604237, l6: 0.650639

[epoch:  32/100, batch:   774/  792, ite: 50343] train loss: 5.090439, tar: 0.537307 
l0: 0.742692, l1: 0.748627, l2: 0.747667, l3: 0.746115, l4: 0.773752, l5: 0.871562, l6: 1.024801

[epoch:  32/100, batch:   776/  792, ite: 50344] train loss: 5.095049, tar: 0.53

l0: 0.293586, l1: 0.299534, l2: 0.297410, l3: 0.304437, l4: 0.320331, l5: 0.423895, l6: 0.635738

[epoch:  33/100, batch:    64/  792, ite: 50384] train loss: 5.103727, tar: 0.536506 
l0: 0.756921, l1: 0.763654, l2: 0.760250, l3: 0.756350, l4: 0.771513, l5: 0.837912, l6: 0.931045

[epoch:  33/100, batch:    66/  792, ite: 50385] train loss: 5.107690, tar: 0.537079 
l0: 0.354861, l1: 0.358227, l2: 0.357554, l3: 0.366868, l4: 0.379814, l5: 0.424434, l6: 0.589781

[epoch:  33/100, batch:    68/  792, ite: 50386] train loss: 5.103505, tar: 0.536607 
l0: 0.564648, l1: 0.577331, l2: 0.572527, l3: 0.566696, l4: 0.570632, l5: 0.585402, l6: 0.702709

[epoch:  33/100, batch:    70/  792, ite: 50387] train loss: 5.103382, tar: 0.536679 
l0: 0.274294, l1: 0.265858, l2: 0.268808, l3: 0.279190, l4: 0.317948, l5: 0.419938, l6: 0.548023

[epoch:  33/100, batch:    72/  792, ite: 50388] train loss: 5.097818, tar: 0.536003 
l0: 0.632048, l1: 0.646922, l2: 0.644230, l3: 0.634084, l4: 0.645703, l5: 0.7285

[epoch:  33/100, batch:   152/  792, ite: 50428] train loss: 5.104089, tar: 0.536914 
l0: 0.656822, l1: 0.668412, l2: 0.666039, l3: 0.667803, l4: 0.662651, l5: 0.711360, l6: 0.648489

[epoch:  33/100, batch:   154/  792, ite: 50429] train loss: 5.104938, tar: 0.537194 
l0: 0.283087, l1: 0.284281, l2: 0.281156, l3: 0.287830, l4: 0.313248, l5: 0.413516, l6: 0.491544

[epoch:  33/100, batch:   156/  792, ite: 50430] train loss: 5.099694, tar: 0.536603 
l0: 0.342493, l1: 0.342186, l2: 0.339297, l3: 0.343288, l4: 0.375806, l5: 0.426700, l6: 0.535735

[epoch:  33/100, batch:   158/  792, ite: 50431] train loss: 5.095584, tar: 0.536152 
l0: 0.514798, l1: 0.515039, l2: 0.511502, l3: 0.502288, l4: 0.532573, l5: 0.652024, l6: 0.917238

[epoch:  33/100, batch:   160/  792, ite: 50432] train loss: 5.095822, tar: 0.536103 
l0: 0.606108, l1: 0.610281, l2: 0.613615, l3: 0.628561, l4: 0.602322, l5: 0.582356, l6: 0.636955

[epoch:  33/100, batch:   162/  792, ite: 50433] train loss: 5.095520, tar: 0.53

[epoch:  33/100, batch:   242/  792, ite: 50473] train loss: 5.065159, tar: 0.532467 
l0: 0.772804, l1: 0.781280, l2: 0.783107, l3: 0.792984, l4: 0.803128, l5: 0.869750, l6: 1.265357

[epoch:  33/100, batch:   244/  792, ite: 50474] train loss: 5.070126, tar: 0.532974 
l0: 0.416547, l1: 0.422975, l2: 0.422853, l3: 0.429152, l4: 0.448936, l5: 0.579434, l6: 0.663084

[epoch:  33/100, batch:   246/  792, ite: 50475] train loss: 5.068379, tar: 0.532729 
l0: 0.673959, l1: 0.680022, l2: 0.682219, l3: 0.667152, l4: 0.682947, l5: 0.716877, l6: 0.824268

[epoch:  33/100, batch:   248/  792, ite: 50476] train loss: 5.070161, tar: 0.533026 
l0: 0.534532, l1: 0.531448, l2: 0.530929, l3: 0.530677, l4: 0.551213, l5: 0.584599, l6: 0.717039

[epoch:  33/100, batch:   250/  792, ite: 50477] train loss: 5.069377, tar: 0.533029 
l0: 0.619871, l1: 0.624302, l2: 0.627527, l3: 0.638854, l4: 0.653666, l5: 0.760758, l6: 0.955682

[epoch:  33/100, batch:   252/  792, ite: 50478] train loss: 5.071036, tar: 0.53

[epoch:  33/100, batch:   332/  792, ite: 50518] train loss: 5.093984, tar: 0.536319 
l0: 0.346711, l1: 0.348430, l2: 0.351583, l3: 0.359516, l4: 0.419695, l5: 0.539940, l6: 0.813706

[epoch:  33/100, batch:   334/  792, ite: 50519] train loss: 5.091947, tar: 0.535954 
l0: 0.334577, l1: 0.339321, l2: 0.339746, l3: 0.341017, l4: 0.376353, l5: 0.527987, l6: 0.715262

[epoch:  33/100, batch:   336/  792, ite: 50520] train loss: 5.089254, tar: 0.535567 
l0: 0.537444, l1: 0.544281, l2: 0.544406, l3: 0.543338, l4: 0.548327, l5: 0.628551, l6: 0.764074

[epoch:  33/100, batch:   338/  792, ite: 50521] train loss: 5.088948, tar: 0.535570 
l0: 0.312427, l1: 0.313765, l2: 0.319531, l3: 0.327691, l4: 0.367383, l5: 0.530198, l6: 0.733093

[epoch:  33/100, batch:   340/  792, ite: 50522] train loss: 5.086256, tar: 0.535143 
l0: 0.302086, l1: 0.304030, l2: 0.303464, l3: 0.310913, l4: 0.333761, l5: 0.401195, l6: 0.540448

[epoch:  33/100, batch:   342/  792, ite: 50523] train loss: 5.082521, tar: 0.53

[epoch:  33/100, batch:   422/  792, ite: 50563] train loss: 5.084503, tar: 0.534361 
l0: 1.067006, l1: 1.080962, l2: 1.075839, l3: 1.078234, l4: 1.133467, l5: 1.286792, l6: 1.317274

[epoch:  33/100, batch:   424/  792, ite: 50564] train loss: 5.092344, tar: 0.535305 
l0: 0.516400, l1: 0.523516, l2: 0.521329, l3: 0.528709, l4: 0.530734, l5: 0.589631, l6: 0.848117

[epoch:  33/100, batch:   426/  792, ite: 50565] train loss: 5.092348, tar: 0.535272 
l0: 0.629490, l1: 0.630315, l2: 0.629283, l3: 0.632884, l4: 0.662782, l5: 0.725744, l6: 0.856184

[epoch:  33/100, batch:   428/  792, ite: 50566] train loss: 5.093321, tar: 0.535438 
l0: 0.416738, l1: 0.421960, l2: 0.425023, l3: 0.431603, l4: 0.474360, l5: 0.576431, l6: 0.805392

[epoch:  33/100, batch:   430/  792, ite: 50567] train loss: 5.092301, tar: 0.535229 
l0: 0.788744, l1: 0.782539, l2: 0.784620, l3: 0.781466, l4: 0.785596, l5: 0.910986, l6: 1.003203

[epoch:  33/100, batch:   432/  792, ite: 50568] train loss: 5.095589, tar: 0.53

[epoch:  33/100, batch:   512/  792, ite: 50608] train loss: 5.058120, tar: 0.530700 
l0: 0.446567, l1: 0.453820, l2: 0.452424, l3: 0.445375, l4: 0.466942, l5: 0.590708, l6: 0.767512

[epoch:  33/100, batch:   514/  792, ite: 50609] train loss: 5.057169, tar: 0.530562 
l0: 0.699688, l1: 0.705530, l2: 0.705946, l3: 0.700961, l4: 0.742909, l5: 0.843510, l6: 0.950377

[epoch:  33/100, batch:   516/  792, ite: 50610] train loss: 5.059455, tar: 0.530839 
l0: 0.373788, l1: 0.378610, l2: 0.375491, l3: 0.378082, l4: 0.394387, l5: 0.497247, l6: 0.682595

[epoch:  33/100, batch:   518/  792, ite: 50611] train loss: 5.057464, tar: 0.530582 
l0: 0.427552, l1: 0.435221, l2: 0.435026, l3: 0.439786, l4: 0.456032, l5: 0.538509, l6: 0.608072

[epoch:  33/100, batch:   520/  792, ite: 50612] train loss: 5.055835, tar: 0.530414 
l0: 0.521796, l1: 0.524793, l2: 0.524364, l3: 0.524927, l4: 0.538135, l5: 0.624047, l6: 0.711398

[epoch:  33/100, batch:   522/  792, ite: 50613] train loss: 5.055315, tar: 0.53

[epoch:  33/100, batch:   602/  792, ite: 50653] train loss: 5.103909, tar: 0.536785 
l0: 0.416412, l1: 0.412905, l2: 0.414963, l3: 0.418329, l4: 0.408679, l5: 0.435497, l6: 0.648711

[epoch:  33/100, batch:   604/  792, ite: 50654] train loss: 5.101994, tar: 0.536601 
l0: 0.445330, l1: 0.443969, l2: 0.444909, l3: 0.460645, l4: 0.491093, l5: 0.528376, l6: 0.620902

[epoch:  33/100, batch:   606/  792, ite: 50655] train loss: 5.100451, tar: 0.536462 
l0: 0.249248, l1: 0.250540, l2: 0.252899, l3: 0.258676, l4: 0.288055, l5: 0.395160, l6: 0.511138

[epoch:  33/100, batch:   608/  792, ite: 50656] train loss: 5.097063, tar: 0.536024 
l0: 0.432037, l1: 0.435691, l2: 0.438115, l3: 0.452574, l4: 0.479423, l5: 0.638215, l6: 0.867377

[epoch:  33/100, batch:   610/  792, ite: 50657] train loss: 5.096619, tar: 0.535866 
l0: 0.354894, l1: 0.354548, l2: 0.352171, l3: 0.349342, l4: 0.387697, l5: 0.508040, l6: 0.655427

[epoch:  33/100, batch:   612/  792, ite: 50658] train loss: 5.094614, tar: 0.53

[epoch:  33/100, batch:   692/  792, ite: 50698] train loss: 5.090247, tar: 0.534963 
l0: 0.518031, l1: 0.520931, l2: 0.520862, l3: 0.533948, l4: 0.575063, l5: 0.588180, l6: 0.723216

[epoch:  33/100, batch:   694/  792, ite: 50699] train loss: 5.089552, tar: 0.534938 
l0: 0.434956, l1: 0.434434, l2: 0.433793, l3: 0.431993, l4: 0.435463, l5: 0.488051, l6: 0.922989

[epoch:  33/100, batch:   696/  792, ite: 50700] train loss: 5.088715, tar: 0.534796 
l0: 0.390481, l1: 0.392644, l2: 0.394618, l3: 0.403552, l4: 0.442819, l5: 0.598649, l6: 0.802409

[epoch:  33/100, batch:   698/  792, ite: 50701] train loss: 5.087604, tar: 0.534590 
l0: 0.533414, l1: 0.539380, l2: 0.544062, l3: 0.552033, l4: 0.601203, l5: 0.716135, l6: 0.853561

[epoch:  33/100, batch:   700/  792, ite: 50702] train loss: 5.087802, tar: 0.534588 
l0: 0.342899, l1: 0.344288, l2: 0.345121, l3: 0.352038, l4: 0.380937, l5: 0.472836, l6: 0.582203

[epoch:  33/100, batch:   702/  792, ite: 50703] train loss: 5.085475, tar: 0.53

[epoch:  33/100, batch:   782/  792, ite: 50743] train loss: 5.090904, tar: 0.535009 
l0: 0.481308, l1: 0.486755, l2: 0.485003, l3: 0.488418, l4: 0.491677, l5: 0.604916, l6: 0.740743

[epoch:  33/100, batch:   784/  792, ite: 50744] train loss: 5.090106, tar: 0.534937 
l0: 0.588489, l1: 0.585754, l2: 0.581768, l3: 0.571876, l4: 0.582594, l5: 0.606002, l6: 0.730692

[epoch:  33/100, batch:   786/  792, ite: 50745] train loss: 5.089983, tar: 0.535009 
l0: 0.449022, l1: 0.446088, l2: 0.446931, l3: 0.447193, l4: 0.464308, l5: 0.627173, l6: 0.796828

[epoch:  33/100, batch:   788/  792, ite: 50746] train loss: 5.089119, tar: 0.534893 
l0: 0.450829, l1: 0.451901, l2: 0.449534, l3: 0.450430, l4: 0.471213, l5: 0.535497, l6: 0.675637

[epoch:  33/100, batch:   790/  792, ite: 50747] train loss: 5.087892, tar: 0.534781 
l0: 0.571584, l1: 0.564994, l2: 0.560803, l3: 0.558274, l4: 0.541446, l5: 0.591685, l6: 0.628106

[epoch:  33/100, batch:   792/  792, ite: 50748] train loss: 5.087365, tar: 0.53

l0: 0.486718, l1: 0.494501, l2: 0.494011, l3: 0.509620, l4: 0.540397, l5: 0.582245, l6: 0.648065

[epoch:  34/100, batch:    80/  792, ite: 50788] train loss: 5.094697, tar: 0.535291 
l0: 0.452904, l1: 0.455881, l2: 0.456663, l3: 0.470104, l4: 0.509971, l5: 0.595408, l6: 0.782740

[epoch:  34/100, batch:    82/  792, ite: 50789] train loss: 5.094115, tar: 0.535187 
l0: 0.368007, l1: 0.378072, l2: 0.375272, l3: 0.367648, l4: 0.372025, l5: 0.497425, l6: 0.542190

[epoch:  34/100, batch:    84/  792, ite: 50790] train loss: 5.092106, tar: 0.534975 
l0: 0.492513, l1: 0.486593, l2: 0.487362, l3: 0.489370, l4: 0.492414, l5: 0.589381, l6: 0.751729

[epoch:  34/100, batch:    86/  792, ite: 50791] train loss: 5.091380, tar: 0.534922 
l0: 0.861378, l1: 0.860848, l2: 0.859860, l3: 0.859464, l4: 0.890145, l5: 0.957603, l6: 1.060161

[epoch:  34/100, batch:    88/  792, ite: 50792] train loss: 5.094423, tar: 0.535334 
l0: 0.634366, l1: 0.642921, l2: 0.634802, l3: 0.641075, l4: 0.669731, l5: 0.7774

[epoch:  34/100, batch:   168/  792, ite: 50832] train loss: 5.077172, tar: 0.533377 
l0: 0.310469, l1: 0.310979, l2: 0.310920, l3: 0.311805, l4: 0.336310, l5: 0.444660, l6: 0.706292

[epoch:  34/100, batch:   170/  792, ite: 50833] train loss: 5.075187, tar: 0.533110 
l0: 0.510538, l1: 0.512371, l2: 0.507494, l3: 0.506327, l4: 0.505884, l5: 0.526927, l6: 0.615392

[epoch:  34/100, batch:   172/  792, ite: 50834] train loss: 5.074481, tar: 0.533083 
l0: 0.286269, l1: 0.292103, l2: 0.288760, l3: 0.299616, l4: 0.325050, l5: 0.445292, l6: 0.558726

[epoch:  34/100, batch:   174/  792, ite: 50835] train loss: 5.072114, tar: 0.532787 
l0: 1.083277, l1: 1.098528, l2: 1.098084, l3: 1.098949, l4: 1.128514, l5: 1.170070, l6: 1.283659

[epoch:  34/100, batch:   176/  792, ite: 50836] train loss: 5.077180, tar: 0.533446 
l0: 0.297307, l1: 0.299285, l2: 0.297820, l3: 0.303443, l4: 0.348666, l5: 0.453186, l6: 0.537480

[epoch:  34/100, batch:   178/  792, ite: 50837] train loss: 5.074795, tar: 0.53

[epoch:  34/100, batch:   258/  792, ite: 50877] train loss: 5.092567, tar: 0.535130 
l0: 0.459438, l1: 0.456590, l2: 0.455935, l3: 0.452396, l4: 0.451479, l5: 0.528108, l6: 0.509721

[epoch:  34/100, batch:   260/  792, ite: 50878] train loss: 5.091234, tar: 0.535044 
l0: 0.394154, l1: 0.399275, l2: 0.402179, l3: 0.415812, l4: 0.456195, l5: 0.566000, l6: 0.713210

[epoch:  34/100, batch:   262/  792, ite: 50879] train loss: 5.090137, tar: 0.534884 
l0: 0.406591, l1: 0.414983, l2: 0.410206, l3: 0.425724, l4: 0.460558, l5: 0.542077, l6: 0.625502

[epoch:  34/100, batch:   264/  792, ite: 50880] train loss: 5.088931, tar: 0.534738 
l0: 0.472049, l1: 0.478147, l2: 0.479330, l3: 0.484629, l4: 0.501764, l5: 0.616156, l6: 0.835243

[epoch:  34/100, batch:   266/  792, ite: 50881] train loss: 5.088523, tar: 0.534667 
l0: 0.677667, l1: 0.680779, l2: 0.681234, l3: 0.684542, l4: 0.671448, l5: 0.732895, l6: 0.777093

[epoch:  34/100, batch:   268/  792, ite: 50882] train loss: 5.089357, tar: 0.53

[epoch:  34/100, batch:   348/  792, ite: 50922] train loss: 5.088566, tar: 0.534407 
l0: 0.534366, l1: 0.535997, l2: 0.531618, l3: 0.532420, l4: 0.545601, l5: 0.639910, l6: 0.684444

[epoch:  34/100, batch:   350/  792, ite: 50923] train loss: 5.088103, tar: 0.534407 
l0: 0.316088, l1: 0.319442, l2: 0.319126, l3: 0.336195, l4: 0.380847, l5: 0.525110, l6: 0.627947

[epoch:  34/100, batch:   352/  792, ite: 50924] train loss: 5.086514, tar: 0.534171 
l0: 0.434034, l1: 0.440996, l2: 0.445198, l3: 0.459601, l4: 0.486249, l5: 0.579772, l6: 0.805203

[epoch:  34/100, batch:   354/  792, ite: 50925] train loss: 5.085853, tar: 0.534062 
l0: 0.716969, l1: 0.727547, l2: 0.730270, l3: 0.724619, l4: 0.718745, l5: 0.744736, l6: 0.843269

[epoch:  34/100, batch:   356/  792, ite: 50926] train loss: 5.086997, tar: 0.534260 
l0: 0.412986, l1: 0.414626, l2: 0.409637, l3: 0.417166, l4: 0.446891, l5: 0.460563, l6: 0.775064

[epoch:  34/100, batch:   358/  792, ite: 50927] train loss: 5.085944, tar: 0.53

[epoch:  34/100, batch:   438/  792, ite: 50967] train loss: 5.095951, tar: 0.535412 
l0: 0.502049, l1: 0.497619, l2: 0.497082, l3: 0.495426, l4: 0.504942, l5: 0.646300, l6: 0.811149

[epoch:  34/100, batch:   440/  792, ite: 50968] train loss: 5.095667, tar: 0.535377 
l0: 0.606282, l1: 0.613565, l2: 0.618580, l3: 0.615363, l4: 0.634295, l5: 0.640371, l6: 0.809134

[epoch:  34/100, batch:   442/  792, ite: 50969] train loss: 5.096043, tar: 0.535451 
l0: 0.359116, l1: 0.362712, l2: 0.356864, l3: 0.352831, l4: 0.378432, l5: 0.477118, l6: 0.548294

[epoch:  34/100, batch:   444/  792, ite: 50970] train loss: 5.094339, tar: 0.535269 
l0: 0.427186, l1: 0.429758, l2: 0.432689, l3: 0.436544, l4: 0.453992, l5: 0.576942, l6: 0.801489

[epoch:  34/100, batch:   446/  792, ite: 50971] train loss: 5.093679, tar: 0.535157 
l0: 0.396159, l1: 0.402826, l2: 0.402843, l3: 0.409759, l4: 0.425784, l5: 0.506292, l6: 0.696648

[epoch:  34/100, batch:   448/  792, ite: 50972] train loss: 5.092602, tar: 0.53

[epoch:  34/100, batch:   528/  792, ite: 51012] train loss: 5.064268, tar: 0.531340 
l0: 0.306597, l1: 0.311254, l2: 0.309472, l3: 0.316280, l4: 0.363901, l5: 0.450995, l6: 0.513569

[epoch:  34/100, batch:   530/  792, ite: 51013] train loss: 5.062407, tar: 0.531118 
l0: 0.663263, l1: 0.668370, l2: 0.666726, l3: 0.667957, l4: 0.695794, l5: 0.753769, l6: 0.805310

[epoch:  34/100, batch:   532/  792, ite: 51014] train loss: 5.063137, tar: 0.531249 
l0: 0.708765, l1: 0.712082, l2: 0.707402, l3: 0.701422, l4: 0.731091, l5: 0.770171, l6: 0.821999

[epoch:  34/100, batch:   534/  792, ite: 51015] train loss: 5.064115, tar: 0.531424 
l0: 0.467255, l1: 0.467997, l2: 0.468558, l3: 0.475321, l4: 0.516838, l5: 0.553129, l6: 0.659417

[epoch:  34/100, batch:   536/  792, ite: 51016] train loss: 5.063423, tar: 0.531361 
l0: 0.342017, l1: 0.340771, l2: 0.342571, l3: 0.344351, l4: 0.392295, l5: 0.450095, l6: 0.614506

[epoch:  34/100, batch:   538/  792, ite: 51017] train loss: 5.061842, tar: 0.53

[epoch:  34/100, batch:   618/  792, ite: 51057] train loss: 5.070264, tar: 0.532490 
l0: 0.240602, l1: 0.240560, l2: 0.240699, l3: 0.248628, l4: 0.286732, l5: 0.438412, l6: 0.547202

[epoch:  34/100, batch:   620/  792, ite: 51058] train loss: 5.068172, tar: 0.532215 
l0: 0.290619, l1: 0.289516, l2: 0.292536, l3: 0.307549, l4: 0.358416, l5: 0.512185, l6: 0.691930

[epoch:  34/100, batch:   622/  792, ite: 51059] train loss: 5.066728, tar: 0.531986 
l0: 0.807680, l1: 0.793903, l2: 0.787320, l3: 0.795646, l4: 0.787106, l5: 0.673922, l6: 0.829385

[epoch:  34/100, batch:   624/  792, ite: 51060] train loss: 5.068138, tar: 0.532247 
l0: 0.332611, l1: 0.334468, l2: 0.335721, l3: 0.340782, l4: 0.362294, l5: 0.467617, l6: 0.674553

[epoch:  34/100, batch:   626/  792, ite: 51061] train loss: 5.066748, tar: 0.532058 
l0: 0.404828, l1: 0.410648, l2: 0.411424, l3: 0.418671, l4: 0.462312, l5: 0.544439, l6: 0.770431

[epoch:  34/100, batch:   628/  792, ite: 51062] train loss: 5.066036, tar: 0.53

[epoch:  34/100, batch:   708/  792, ite: 51102] train loss: 5.064073, tar: 0.531688 
l0: 0.593059, l1: 0.594069, l2: 0.592586, l3: 0.592054, l4: 0.594778, l5: 0.656367, l6: 0.778253

[epoch:  34/100, batch:   710/  792, ite: 51103] train loss: 5.064242, tar: 0.531744 
l0: 0.371958, l1: 0.372861, l2: 0.369222, l3: 0.370866, l4: 0.418269, l5: 0.531922, l6: 0.921223

[epoch:  34/100, batch:   712/  792, ite: 51104] train loss: 5.063489, tar: 0.531599 
l0: 0.940856, l1: 0.973503, l2: 0.970822, l3: 0.955748, l4: 0.954060, l5: 1.073152, l6: 1.165589

[epoch:  34/100, batch:   714/  792, ite: 51105] train loss: 5.066293, tar: 0.531969 
l0: 0.501092, l1: 0.502748, l2: 0.507953, l3: 0.514175, l4: 0.530215, l5: 0.736589, l6: 1.018587

[epoch:  34/100, batch:   716/  792, ite: 51106] train loss: 5.066704, tar: 0.531941 
l0: 0.221651, l1: 0.223542, l2: 0.222044, l3: 0.226996, l4: 0.236133, l5: 0.336287, l6: 0.434752

[epoch:  34/100, batch:   718/  792, ite: 51107] train loss: 5.064284, tar: 0.53

l0: 0.604060, l1: 0.610956, l2: 0.610282, l3: 0.609979, l4: 0.610352, l5: 0.701104, l6: 0.860177

[epoch:  35/100, batch:     6/  792, ite: 51147] train loss: 5.072633, tar: 0.532813 
l0: 0.558947, l1: 0.565548, l2: 0.568512, l3: 0.567860, l4: 0.545115, l5: 0.710445, l6: 0.822839

[epoch:  35/100, batch:     8/  792, ite: 51148] train loss: 5.072796, tar: 0.532836 
l0: 0.541085, l1: 0.541553, l2: 0.540910, l3: 0.543581, l4: 0.545925, l5: 0.623812, l6: 0.849398

[epoch:  35/100, batch:    10/  792, ite: 51149] train loss: 5.072835, tar: 0.532843 
l0: 0.347954, l1: 0.349358, l2: 0.350629, l3: 0.348970, l4: 0.381199, l5: 0.570407, l6: 0.795130

[epoch:  35/100, batch:    12/  792, ite: 51150] train loss: 5.071888, tar: 0.532682 
l0: 0.508025, l1: 0.518817, l2: 0.521510, l3: 0.522859, l4: 0.510549, l5: 0.624399, l6: 0.823951

[epoch:  35/100, batch:    14/  792, ite: 51151] train loss: 5.071733, tar: 0.532661 
l0: 0.400802, l1: 0.404373, l2: 0.407690, l3: 0.413345, l4: 0.447061, l5: 0.5495

[epoch:  35/100, batch:    94/  792, ite: 51191] train loss: 5.076183, tar: 0.533043 
l0: 0.659706, l1: 0.660168, l2: 0.659573, l3: 0.664562, l4: 0.675284, l5: 0.647751, l6: 0.718032

[epoch:  35/100, batch:    96/  792, ite: 51192] train loss: 5.076427, tar: 0.533150 
l0: 0.778192, l1: 0.766730, l2: 0.768648, l3: 0.774612, l4: 0.778776, l5: 0.844297, l6: 1.100098

[epoch:  35/100, batch:    98/  792, ite: 51193] train loss: 5.077887, tar: 0.533355 
l0: 0.924969, l1: 0.934737, l2: 0.933561, l3: 0.933217, l4: 0.956294, l5: 0.969477, l6: 1.007151

[epoch:  35/100, batch:   100/  792, ite: 51194] train loss: 5.080147, tar: 0.533683 
l0: 0.436892, l1: 0.439834, l2: 0.439294, l3: 0.454226, l4: 0.467558, l5: 0.552262, l6: 0.626098

[epoch:  35/100, batch:   102/  792, ite: 51195] train loss: 5.079350, tar: 0.533602 
l0: 0.606548, l1: 0.616360, l2: 0.616150, l3: 0.618947, l4: 0.633963, l5: 0.705953, l6: 0.745122

[epoch:  35/100, batch:   104/  792, ite: 51196] train loss: 5.079632, tar: 0.53

[epoch:  35/100, batch:   184/  792, ite: 51236] train loss: 5.088883, tar: 0.534801 
l0: 0.513708, l1: 0.519472, l2: 0.518799, l3: 0.520692, l4: 0.538723, l5: 0.639988, l6: 0.736581

[epoch:  35/100, batch:   186/  792, ite: 51237] train loss: 5.088621, tar: 0.534783 
l0: 0.813311, l1: 0.824137, l2: 0.824546, l3: 0.823541, l4: 0.838746, l5: 0.931383, l6: 1.113830

[epoch:  35/100, batch:   188/  792, ite: 51238] train loss: 5.090390, tar: 0.535008 
l0: 0.433200, l1: 0.435005, l2: 0.433767, l3: 0.437109, l4: 0.436526, l5: 0.478645, l6: 0.853165

[epoch:  35/100, batch:   190/  792, ite: 51239] train loss: 5.089919, tar: 0.534926 
l0: 0.398076, l1: 0.401971, l2: 0.401223, l3: 0.405333, l4: 0.452040, l5: 0.595734, l6: 0.775763

[epoch:  35/100, batch:   192/  792, ite: 51240] train loss: 5.089217, tar: 0.534816 
l0: 0.385332, l1: 0.385059, l2: 0.384606, l3: 0.396884, l4: 0.426448, l5: 0.518305, l6: 0.693953

[epoch:  35/100, batch:   194/  792, ite: 51241] train loss: 5.088271, tar: 0.53

[epoch:  35/100, batch:   274/  792, ite: 51281] train loss: 5.077261, tar: 0.533348 
l0: 0.472121, l1: 0.472211, l2: 0.472602, l3: 0.472167, l4: 0.495178, l5: 0.576016, l6: 0.893104

[epoch:  35/100, batch:   276/  792, ite: 51282] train loss: 5.077008, tar: 0.533301 
l0: 0.729370, l1: 0.741340, l2: 0.740838, l3: 0.744201, l4: 0.805530, l5: 0.977854, l6: 1.149111

[epoch:  35/100, batch:   278/  792, ite: 51283] train loss: 5.078591, tar: 0.533453 
l0: 0.730247, l1: 0.727502, l2: 0.724573, l3: 0.732732, l4: 0.748490, l5: 0.798375, l6: 1.010209

[epoch:  35/100, batch:   280/  792, ite: 51284] train loss: 5.079736, tar: 0.533607 
l0: 0.347549, l1: 0.354477, l2: 0.354740, l3: 0.364102, l4: 0.396193, l5: 0.438945, l6: 0.569526

[epoch:  35/100, batch:   282/  792, ite: 51285] train loss: 5.078557, tar: 0.533462 
l0: 0.271674, l1: 0.273564, l2: 0.269675, l3: 0.278706, l4: 0.302599, l5: 0.402134, l6: 0.526204

[epoch:  35/100, batch:   284/  792, ite: 51286] train loss: 5.076897, tar: 0.53

[epoch:  35/100, batch:   364/  792, ite: 51326] train loss: 5.072821, tar: 0.532751 
l0: 0.360466, l1: 0.364457, l2: 0.364592, l3: 0.366067, l4: 0.406832, l5: 0.590734, l6: 0.749103

[epoch:  35/100, batch:   366/  792, ite: 51327] train loss: 5.072031, tar: 0.532622 
l0: 0.398654, l1: 0.403274, l2: 0.402958, l3: 0.410826, l4: 0.430787, l5: 0.592214, l6: 0.775622

[epoch:  35/100, batch:   368/  792, ite: 51328] train loss: 5.071423, tar: 0.532521 
l0: 0.366829, l1: 0.369078, l2: 0.364549, l3: 0.366841, l4: 0.376910, l5: 0.478112, l6: 0.592834

[epoch:  35/100, batch:   370/  792, ite: 51329] train loss: 5.070307, tar: 0.532396 
l0: 0.523404, l1: 0.523422, l2: 0.525729, l3: 0.532749, l4: 0.552533, l5: 0.674083, l6: 0.824483

[epoch:  35/100, batch:   372/  792, ite: 51330] train loss: 5.070272, tar: 0.532389 
l0: 0.431082, l1: 0.438456, l2: 0.438388, l3: 0.442540, l4: 0.478057, l5: 0.602084, l6: 0.785989

[epoch:  35/100, batch:   374/  792, ite: 51331] train loss: 5.069788, tar: 0.53

[epoch:  35/100, batch:   454/  792, ite: 51371] train loss: 5.067801, tar: 0.532232 
l0: 0.527765, l1: 0.521902, l2: 0.524961, l3: 0.534876, l4: 0.582442, l5: 0.755266, l6: 1.026905

[epoch:  35/100, batch:   456/  792, ite: 51372] train loss: 5.068126, tar: 0.532229 
l0: 1.019593, l1: 1.047671, l2: 1.049639, l3: 1.049767, l4: 1.059286, l5: 1.177183, l6: 1.297626

[epoch:  35/100, batch:   458/  792, ite: 51373] train loss: 5.071267, tar: 0.532584 
l0: 0.484269, l1: 0.481601, l2: 0.480288, l3: 0.494685, l4: 0.516963, l5: 0.635062, l6: 0.797731

[epoch:  35/100, batch:   460/  792, ite: 51374] train loss: 5.071135, tar: 0.532549 
l0: 0.599854, l1: 0.603068, l2: 0.603633, l3: 0.605566, l4: 0.618495, l5: 0.641340, l6: 1.301477

[epoch:  35/100, batch:   462/  792, ite: 51375] train loss: 5.072205, tar: 0.532598 
l0: 0.279702, l1: 0.278665, l2: 0.276281, l3: 0.294136, l4: 0.337834, l5: 0.475421, l6: 0.601639

[epoch:  35/100, batch:   464/  792, ite: 51376] train loss: 5.070879, tar: 0.53

[epoch:  35/100, batch:   544/  792, ite: 51416] train loss: 5.073176, tar: 0.532556 
l0: 0.720828, l1: 0.723323, l2: 0.718567, l3: 0.713864, l4: 0.748138, l5: 0.833794, l6: 1.191445

[epoch:  35/100, batch:   546/  792, ite: 51417] train loss: 5.074466, tar: 0.532689 
l0: 0.524245, l1: 0.527800, l2: 0.529550, l3: 0.524878, l4: 0.526188, l5: 0.627090, l6: 0.710000

[epoch:  35/100, batch:   548/  792, ite: 51418] train loss: 5.074223, tar: 0.532683 
l0: 0.320515, l1: 0.324664, l2: 0.322844, l3: 0.324879, l4: 0.342182, l5: 0.371637, l6: 0.467819

[epoch:  35/100, batch:   550/  792, ite: 51419] train loss: 5.072724, tar: 0.532533 
l0: 0.509240, l1: 0.513041, l2: 0.517058, l3: 0.511325, l4: 0.547042, l5: 0.593436, l6: 0.644191

[epoch:  35/100, batch:   552/  792, ite: 51420] train loss: 5.072353, tar: 0.532517 
l0: 0.388724, l1: 0.390414, l2: 0.383534, l3: 0.381728, l4: 0.399147, l5: 0.512161, l6: 0.736225

[epoch:  35/100, batch:   554/  792, ite: 51421] train loss: 5.071592, tar: 0.53

[epoch:  35/100, batch:   634/  792, ite: 51461] train loss: 5.059307, tar: 0.531006 
l0: 0.507169, l1: 0.513873, l2: 0.517044, l3: 0.515267, l4: 0.567554, l5: 0.630109, l6: 0.767080

[epoch:  35/100, batch:   636/  792, ite: 51462] train loss: 5.059160, tar: 0.530990 
l0: 0.363178, l1: 0.362149, l2: 0.363092, l3: 0.370506, l4: 0.416282, l5: 0.519706, l6: 0.657672

[epoch:  35/100, batch:   638/  792, ite: 51463] train loss: 5.058233, tar: 0.530875 
l0: 0.557887, l1: 0.562018, l2: 0.558184, l3: 0.555588, l4: 0.579632, l5: 0.665208, l6: 0.754940

[epoch:  35/100, batch:   640/  792, ite: 51464] train loss: 5.058187, tar: 0.530894 
l0: 0.220647, l1: 0.225040, l2: 0.227401, l3: 0.250007, l4: 0.295365, l5: 0.388907, l6: 0.624441

[epoch:  35/100, batch:   642/  792, ite: 51465] train loss: 5.056718, tar: 0.530682 
l0: 0.517579, l1: 0.515876, l2: 0.517505, l3: 0.521992, l4: 0.548176, l5: 0.623533, l6: 0.779447

[epoch:  35/100, batch:   644/  792, ite: 51466] train loss: 5.056576, tar: 0.53

[epoch:  35/100, batch:   724/  792, ite: 51506] train loss: 5.051588, tar: 0.529968 
l0: 0.750632, l1: 0.756673, l2: 0.757095, l3: 0.755718, l4: 0.752272, l5: 0.742909, l6: 0.868584

[epoch:  35/100, batch:   726/  792, ite: 51507] train loss: 5.052416, tar: 0.530114 
l0: 0.311354, l1: 0.315079, l2: 0.315130, l3: 0.325613, l4: 0.363988, l5: 0.425950, l6: 0.492257

[epoch:  35/100, batch:   728/  792, ite: 51508] train loss: 5.051146, tar: 0.529969 
l0: 0.270645, l1: 0.271793, l2: 0.272496, l3: 0.277739, l4: 0.305683, l5: 0.392670, l6: 0.578700

[epoch:  35/100, batch:   730/  792, ite: 51509] train loss: 5.049799, tar: 0.529797 
l0: 0.756919, l1: 0.752419, l2: 0.756778, l3: 0.752167, l4: 0.755519, l5: 0.771050, l6: 0.999350

[epoch:  35/100, batch:   732/  792, ite: 51510] train loss: 5.050934, tar: 0.529948 
l0: 0.389019, l1: 0.385486, l2: 0.378636, l3: 0.384768, l4: 0.402452, l5: 0.500989, l6: 0.725211

[epoch:  35/100, batch:   734/  792, ite: 51511] train loss: 5.050160, tar: 0.52

l0: 0.543611, l1: 0.549560, l2: 0.557251, l3: 0.558278, l4: 0.567962, l5: 0.645850, l6: 0.687073

[epoch:  36/100, batch:    22/  792, ite: 51551] train loss: 5.048971, tar: 0.529824 
l0: 0.668988, l1: 0.670817, l2: 0.671381, l3: 0.670160, l4: 0.655024, l5: 0.687044, l6: 0.830720

[epoch:  36/100, batch:    24/  792, ite: 51552] train loss: 5.049409, tar: 0.529914 
l0: 0.743752, l1: 0.745631, l2: 0.746269, l3: 0.754927, l4: 0.793703, l5: 0.957635, l6: 1.047505

[epoch:  36/100, batch:    26/  792, ite: 51553] train loss: 5.050609, tar: 0.530052 
l0: 0.294343, l1: 0.291828, l2: 0.294406, l3: 0.301868, l4: 0.348246, l5: 0.526358, l6: 0.621678

[epoch:  36/100, batch:    28/  792, ite: 51554] train loss: 5.049499, tar: 0.529900 
l0: 0.287751, l1: 0.291373, l2: 0.293492, l3: 0.292949, l4: 0.335694, l5: 0.507630, l6: 0.717232

[epoch:  36/100, batch:    30/  792, ite: 51555] train loss: 5.048458, tar: 0.529744 
l0: 0.552327, l1: 0.559340, l2: 0.555530, l3: 0.552089, l4: 0.571335, l5: 0.7149

[epoch:  36/100, batch:   110/  792, ite: 51595] train loss: 5.056066, tar: 0.530403 
l0: 0.522816, l1: 0.527102, l2: 0.527480, l3: 0.527034, l4: 0.522114, l5: 0.559026, l6: 0.806448

[epoch:  36/100, batch:   112/  792, ite: 51596] train loss: 5.055963, tar: 0.530398 
l0: 1.009209, l1: 1.020619, l2: 1.017534, l3: 1.029546, l4: 1.077916, l5: 0.988666, l6: 1.200143

[epoch:  36/100, batch:   114/  792, ite: 51597] train loss: 5.058181, tar: 0.530698 
l0: 0.362835, l1: 0.366498, l2: 0.366165, l3: 0.366540, l4: 0.404678, l5: 0.484535, l6: 0.638771

[epoch:  36/100, batch:   116/  792, ite: 51598] train loss: 5.057312, tar: 0.530593 
l0: 0.444050, l1: 0.443897, l2: 0.451735, l3: 0.451749, l4: 0.484526, l5: 0.644366, l6: 1.009037

[epoch:  36/100, batch:   118/  792, ite: 51599] train loss: 5.057283, tar: 0.530538 
l0: 0.592860, l1: 0.598598, l2: 0.596660, l3: 0.597959, l4: 0.596599, l5: 0.631042, l6: 0.764273

[epoch:  36/100, batch:   120/  792, ite: 51600] train loss: 5.057334, tar: 0.53

[epoch:  36/100, batch:   200/  792, ite: 51640] train loss: 5.054213, tar: 0.530342 
l0: 0.349809, l1: 0.352608, l2: 0.353334, l3: 0.354987, l4: 0.395094, l5: 0.427460, l6: 0.727633

[epoch:  36/100, batch:   202/  792, ite: 51641] train loss: 5.053412, tar: 0.530232 
l0: 0.342413, l1: 0.347561, l2: 0.348394, l3: 0.351368, l4: 0.397472, l5: 0.489689, l6: 0.585974

[epoch:  36/100, batch:   204/  792, ite: 51642] train loss: 5.052485, tar: 0.530118 
l0: 0.691223, l1: 0.697059, l2: 0.701131, l3: 0.720354, l4: 0.730454, l5: 0.819968, l6: 0.873201

[epoch:  36/100, batch:   206/  792, ite: 51643] train loss: 5.053123, tar: 0.530216 
l0: 0.270739, l1: 0.270813, l2: 0.272146, l3: 0.278100, l4: 0.308632, l5: 0.449318, l6: 0.598379

[epoch:  36/100, batch:   208/  792, ite: 51644] train loss: 5.051927, tar: 0.530058 
l0: 0.328480, l1: 0.328686, l2: 0.330089, l3: 0.335372, l4: 0.330403, l5: 0.405585, l6: 0.551981

[epoch:  36/100, batch:   210/  792, ite: 51645] train loss: 5.050812, tar: 0.52

[epoch:  36/100, batch:   290/  792, ite: 51685] train loss: 5.043606, tar: 0.529130 
l0: 0.951812, l1: 0.953042, l2: 0.956953, l3: 0.985765, l4: 1.011155, l5: 1.114176, l6: 1.018237

[epoch:  36/100, batch:   292/  792, ite: 51686] train loss: 5.045398, tar: 0.529381 
l0: 0.266196, l1: 0.262330, l2: 0.263280, l3: 0.273126, l4: 0.320071, l5: 0.453123, l6: 0.670880

[epoch:  36/100, batch:   294/  792, ite: 51687] train loss: 5.044284, tar: 0.529225 
l0: 0.474809, l1: 0.472201, l2: 0.473108, l3: 0.482729, l4: 0.503784, l5: 0.647782, l6: 0.749479

[epoch:  36/100, batch:   296/  792, ite: 51688] train loss: 5.044018, tar: 0.529193 
l0: 0.808740, l1: 0.810046, l2: 0.807693, l3: 0.809519, l4: 0.839714, l5: 0.972476, l6: 1.332741

[epoch:  36/100, batch:   298/  792, ite: 51689] train loss: 5.045587, tar: 0.529358 
l0: 0.578150, l1: 0.583925, l2: 0.581312, l3: 0.585866, l4: 0.608044, l5: 0.584251, l6: 0.590307

[epoch:  36/100, batch:   300/  792, ite: 51690] train loss: 5.045448, tar: 0.52

[epoch:  36/100, batch:   380/  792, ite: 51730] train loss: 5.037273, tar: 0.528401 
l0: 0.324225, l1: 0.326622, l2: 0.333517, l3: 0.338204, l4: 0.379031, l5: 0.526904, l6: 0.600154

[epoch:  36/100, batch:   382/  792, ite: 51731] train loss: 5.036350, tar: 0.528283 
l0: 0.576185, l1: 0.577316, l2: 0.577069, l3: 0.586535, l4: 0.602854, l5: 0.645007, l6: 1.095276

[epoch:  36/100, batch:   384/  792, ite: 51732] train loss: 5.036804, tar: 0.528311 
l0: 0.524952, l1: 0.527291, l2: 0.533528, l3: 0.532542, l4: 0.539896, l5: 0.609691, l6: 0.738749

[epoch:  36/100, batch:   386/  792, ite: 51733] train loss: 5.036726, tar: 0.528309 
l0: 0.604267, l1: 0.608958, l2: 0.609855, l3: 0.613601, l4: 0.599902, l5: 0.705164, l6: 0.912639

[epoch:  36/100, batch:   388/  792, ite: 51734] train loss: 5.037062, tar: 0.528353 
l0: 0.721353, l1: 0.736473, l2: 0.739887, l3: 0.723816, l4: 0.722292, l5: 0.770662, l6: 0.854279

[epoch:  36/100, batch:   390/  792, ite: 51735] train loss: 5.037744, tar: 0.52

[epoch:  36/100, batch:   470/  792, ite: 51775] train loss: 5.035535, tar: 0.527969 
l0: 0.491621, l1: 0.500771, l2: 0.499186, l3: 0.495893, l4: 0.543053, l5: 0.672083, l6: 0.694617

[epoch:  36/100, batch:   472/  792, ite: 51776] train loss: 5.035312, tar: 0.527948 
l0: 0.701783, l1: 0.695848, l2: 0.693451, l3: 0.692457, l4: 0.718098, l5: 0.865332, l6: 0.936769

[epoch:  36/100, batch:   474/  792, ite: 51777] train loss: 5.035964, tar: 0.528046 
l0: 0.395887, l1: 0.396385, l2: 0.398084, l3: 0.410928, l4: 0.439038, l5: 0.548076, l6: 0.640424

[epoch:  36/100, batch:   476/  792, ite: 51778] train loss: 5.035354, tar: 0.527972 
l0: 0.505291, l1: 0.501643, l2: 0.500951, l3: 0.508825, l4: 0.516129, l5: 0.548429, l6: 0.664479

[epoch:  36/100, batch:   478/  792, ite: 51779] train loss: 5.035014, tar: 0.527959 
l0: 0.353035, l1: 0.356774, l2: 0.360943, l3: 0.374124, l4: 0.412850, l5: 0.503907, l6: 0.659263

[epoch:  36/100, batch:   480/  792, ite: 51780] train loss: 5.034279, tar: 0.52

[epoch:  36/100, batch:   560/  792, ite: 51820] train loss: 5.043066, tar: 0.528911 
l0: 0.468725, l1: 0.470902, l2: 0.470914, l3: 0.469053, l4: 0.485303, l5: 0.541854, l6: 0.729397

[epoch:  36/100, batch:   562/  792, ite: 51821] train loss: 5.042755, tar: 0.528878 
l0: 0.406622, l1: 0.406988, l2: 0.406463, l3: 0.401731, l4: 0.418090, l5: 0.574513, l6: 0.702702

[epoch:  36/100, batch:   564/  792, ite: 51822] train loss: 5.042205, tar: 0.528811 
l0: 0.601478, l1: 0.604876, l2: 0.606564, l3: 0.608040, l4: 0.628510, l5: 0.700857, l6: 0.811458

[epoch:  36/100, batch:   566/  792, ite: 51823] train loss: 5.042494, tar: 0.528851 
l0: 0.494074, l1: 0.490397, l2: 0.491194, l3: 0.502722, l4: 0.540446, l5: 0.590690, l6: 0.668545

[epoch:  36/100, batch:   568/  792, ite: 51824] train loss: 5.042190, tar: 0.528832 
l0: 0.537042, l1: 0.536464, l2: 0.539832, l3: 0.544218, l4: 0.563232, l5: 0.690557, l6: 0.806280

[epoch:  36/100, batch:   570/  792, ite: 51825] train loss: 5.042257, tar: 0.52

[epoch:  36/100, batch:   650/  792, ite: 51865] train loss: 5.038432, tar: 0.528335 
l0: 0.607360, l1: 0.613870, l2: 0.602113, l3: 0.579414, l4: 0.571586, l5: 0.586165, l6: 0.897514

[epoch:  36/100, batch:   652/  792, ite: 51866] train loss: 5.038612, tar: 0.528377 
l0: 0.332060, l1: 0.334616, l2: 0.334408, l3: 0.346609, l4: 0.386624, l5: 0.446001, l6: 0.652999

[epoch:  36/100, batch:   654/  792, ite: 51867] train loss: 5.037803, tar: 0.528272 
l0: 0.324868, l1: 0.322128, l2: 0.321744, l3: 0.334448, l4: 0.370433, l5: 0.518288, l6: 0.680561

[epoch:  36/100, batch:   656/  792, ite: 51868] train loss: 5.037054, tar: 0.528163 
l0: 0.967610, l1: 0.979434, l2: 0.977277, l3: 0.992475, l4: 1.044021, l5: 1.147965, l6: 1.189821

[epoch:  36/100, batch:   658/  792, ite: 51869] train loss: 5.038863, tar: 0.528398 
l0: 0.249993, l1: 0.251349, l2: 0.252168, l3: 0.257363, l4: 0.300048, l5: 0.403123, l6: 0.516458

[epoch:  36/100, batch:   660/  792, ite: 51870] train loss: 5.037658, tar: 0.52

[epoch:  36/100, batch:   740/  792, ite: 51910] train loss: 5.039312, tar: 0.528493 
l0: 0.421420, l1: 0.423519, l2: 0.430847, l3: 0.442947, l4: 0.430852, l5: 0.437818, l6: 0.556201

[epoch:  36/100, batch:   742/  792, ite: 51911] train loss: 5.038691, tar: 0.528437 
l0: 0.375474, l1: 0.376040, l2: 0.378408, l3: 0.383020, l4: 0.392600, l5: 0.437686, l6: 0.548830

[epoch:  36/100, batch:   744/  792, ite: 51912] train loss: 5.037869, tar: 0.528357 
l0: 0.249166, l1: 0.250000, l2: 0.249108, l3: 0.260413, l4: 0.274402, l5: 0.365802, l6: 0.530387

[epoch:  36/100, batch:   746/  792, ite: 51913] train loss: 5.036677, tar: 0.528211 
l0: 0.384043, l1: 0.382437, l2: 0.384809, l3: 0.391927, l4: 0.390118, l5: 0.488726, l6: 0.602351

[epoch:  36/100, batch:   748/  792, ite: 51914] train loss: 5.035998, tar: 0.528136 
l0: 0.306636, l1: 0.305477, l2: 0.303666, l3: 0.316353, l4: 0.345438, l5: 0.438103, l6: 0.532724

[epoch:  36/100, batch:   750/  792, ite: 51915] train loss: 5.034984, tar: 0.52

l0: 0.542208, l1: 0.543946, l2: 0.549301, l3: 0.551427, l4: 0.587799, l5: 0.704063, l6: 0.839111

[epoch:  37/100, batch:    38/  792, ite: 51955] train loss: 5.035317, tar: 0.527986 
l0: 0.690091, l1: 0.699835, l2: 0.697199, l3: 0.693826, l4: 0.725425, l5: 0.777761, l6: 0.815505

[epoch:  37/100, batch:    40/  792, ite: 51956] train loss: 5.035820, tar: 0.528069 
l0: 0.595416, l1: 0.593805, l2: 0.593329, l3: 0.593322, l4: 0.622722, l5: 0.739436, l6: 0.877275

[epoch:  37/100, batch:    42/  792, ite: 51957] train loss: 5.036059, tar: 0.528103 
l0: 0.522336, l1: 0.521217, l2: 0.523633, l3: 0.521129, l4: 0.533540, l5: 0.588470, l6: 0.712171

[epoch:  37/100, batch:    44/  792, ite: 51958] train loss: 5.035910, tar: 0.528100 
l0: 0.367645, l1: 0.370249, l2: 0.366053, l3: 0.373356, l4: 0.405423, l5: 0.583988, l6: 0.722431

[epoch:  37/100, batch:    46/  792, ite: 51959] train loss: 5.035335, tar: 0.528018 
l0: 0.691202, l1: 0.693303, l2: 0.691382, l3: 0.710182, l4: 0.734750, l5: 0.8251

[epoch:  37/100, batch:   126/  792, ite: 51999] train loss: 5.034982, tar: 0.527857 
l0: 0.442520, l1: 0.444716, l2: 0.445361, l3: 0.457340, l4: 0.505695, l5: 0.609289, l6: 0.940515

[epoch:  37/100, batch:   128/  792, ite: 52000] train loss: 5.034880, tar: 0.527814 
l0: 0.482356, l1: 0.480049, l2: 0.481382, l3: 0.485439, l4: 0.514020, l5: 0.606056, l6: 0.795010

[epoch:  37/100, batch:   130/  792, ite: 52001] train loss: 4.750906, tar: 0.482356 
l0: 0.492496, l1: 0.494759, l2: 0.498354, l3: 0.503203, l4: 0.548846, l5: 0.677981, l6: 0.791107

[epoch:  37/100, batch:   132/  792, ite: 52002] train loss: 4.810509, tar: 0.487426 
l0: 0.620057, l1: 0.619690, l2: 0.619371, l3: 0.624638, l4: 0.643130, l5: 0.735160, l6: 0.815908

[epoch:  37/100, batch:   134/  792, ite: 52003] train loss: 5.067164, tar: 0.531636 
l0: 0.697133, l1: 0.704000, l2: 0.714219, l3: 0.734594, l4: 0.756474, l5: 0.833725, l6: 0.978914

[epoch:  37/100, batch:   136/  792, ite: 52004] train loss: 5.425044, tar: 0.57

[epoch:  37/100, batch:   216/  792, ite: 52044] train loss: 4.910776, tar: 0.516288 
l0: 0.407369, l1: 0.410586, l2: 0.414582, l3: 0.432671, l4: 0.472114, l5: 0.681385, l6: 0.849747

[epoch:  37/100, batch:   218/  792, ite: 52045] train loss: 4.902155, tar: 0.513868 
l0: 0.568191, l1: 0.574116, l2: 0.566883, l3: 0.545272, l4: 0.555845, l5: 0.593181, l6: 0.689799

[epoch:  37/100, batch:   220/  792, ite: 52046] train loss: 4.900178, tar: 0.515048 
l0: 0.640556, l1: 0.649701, l2: 0.647598, l3: 0.655588, l4: 0.719324, l5: 0.825538, l6: 1.016915

[epoch:  37/100, batch:   222/  792, ite: 52047] train loss: 4.928384, tar: 0.517719 
l0: 0.205033, l1: 0.206111, l2: 0.202667, l3: 0.221098, l4: 0.261062, l5: 0.308096, l6: 0.352757

[epoch:  37/100, batch:   224/  792, ite: 52048] train loss: 4.871565, tar: 0.511205 
l0: 0.574059, l1: 0.583194, l2: 0.580240, l3: 0.573562, l4: 0.587486, l5: 0.683664, l6: 0.860065

[epoch:  37/100, batch:   226/  792, ite: 52049] train loss: 4.880866, tar: 0.51

[epoch:  37/100, batch:   306/  792, ite: 52089] train loss: 5.095220, tar: 0.541720 
l0: 0.588568, l1: 0.594472, l2: 0.598538, l3: 0.589939, l4: 0.602714, l5: 0.801129, l6: 1.001312

[epoch:  37/100, batch:   308/  792, ite: 52090] train loss: 5.103739, tar: 0.542241 
l0: 0.320126, l1: 0.325200, l2: 0.324913, l3: 0.331362, l4: 0.369617, l5: 0.498812, l6: 0.654556

[epoch:  37/100, batch:   310/  792, ite: 52091] train loss: 5.086575, tar: 0.539800 
l0: 0.377881, l1: 0.377715, l2: 0.380132, l3: 0.390882, l4: 0.438993, l5: 0.519221, l6: 0.630709

[epoch:  37/100, batch:   312/  792, ite: 52092] train loss: 5.072193, tar: 0.538040 
l0: 0.384517, l1: 0.384070, l2: 0.386662, l3: 0.394152, l4: 0.414362, l5: 0.528789, l6: 0.683806

[epoch:  37/100, batch:   314/  792, ite: 52093] train loss: 5.060242, tar: 0.536389 
l0: 0.270137, l1: 0.274518, l2: 0.273240, l3: 0.278490, l4: 0.322823, l5: 0.391978, l6: 0.512421

[epoch:  37/100, batch:   316/  792, ite: 52094] train loss: 5.037448, tar: 0.53

[epoch:  37/100, batch:   396/  792, ite: 52134] train loss: 5.004813, tar: 0.527836 
l0: 0.490279, l1: 0.491715, l2: 0.485240, l3: 0.476707, l4: 0.477837, l5: 0.566687, l6: 0.740206

[epoch:  37/100, batch:   398/  792, ite: 52135] train loss: 5.001430, tar: 0.527558 
l0: 0.472702, l1: 0.482118, l2: 0.482116, l3: 0.482129, l4: 0.497912, l5: 0.567040, l6: 0.660984

[epoch:  37/100, batch:   400/  792, ite: 52136] train loss: 4.996538, tar: 0.527154 
l0: 0.731483, l1: 0.731422, l2: 0.722136, l3: 0.711252, l4: 0.748614, l5: 0.741658, l6: 0.907619

[epoch:  37/100, batch:   402/  792, ite: 52137] train loss: 5.005991, tar: 0.528646 
l0: 0.355131, l1: 0.355334, l2: 0.353036, l3: 0.355439, l4: 0.391062, l5: 0.476332, l6: 0.640837

[epoch:  37/100, batch:   404/  792, ite: 52138] train loss: 4.996200, tar: 0.527388 
l0: 0.670252, l1: 0.671020, l2: 0.671219, l3: 0.663657, l4: 0.679003, l5: 0.756881, l6: 0.859224

[epoch:  37/100, batch:   406/  792, ite: 52139] train loss: 5.002741, tar: 0.52

[epoch:  37/100, batch:   486/  792, ite: 52179] train loss: 5.054070, tar: 0.535385 
l0: 0.670742, l1: 0.676878, l2: 0.670582, l3: 0.676975, l4: 0.709691, l5: 0.876825, l6: 0.891798

[epoch:  37/100, batch:   488/  792, ite: 52180] train loss: 5.059959, tar: 0.536137 
l0: 0.214127, l1: 0.218731, l2: 0.215480, l3: 0.221256, l4: 0.253155, l5: 0.398794, l6: 0.449316

[epoch:  37/100, batch:   490/  792, ite: 52181] train loss: 5.045582, tar: 0.534358 
l0: 0.234539, l1: 0.240148, l2: 0.241935, l3: 0.249783, l4: 0.285508, l5: 0.374627, l6: 0.595760

[epoch:  37/100, batch:   492/  792, ite: 52182] train loss: 5.033632, tar: 0.532711 
l0: 0.533329, l1: 0.527441, l2: 0.530903, l3: 0.530273, l4: 0.518094, l5: 0.577688, l6: 0.757715

[epoch:  37/100, batch:   494/  792, ite: 52183] train loss: 5.032413, tar: 0.532714 
l0: 0.336548, l1: 0.337877, l2: 0.336381, l3: 0.341613, l4: 0.384651, l5: 0.495654, l6: 0.717630

[epoch:  37/100, batch:   496/  792, ite: 52184] train loss: 5.025068, tar: 0.53

[epoch:  37/100, batch:   576/  792, ite: 52224] train loss: 4.996191, tar: 0.527778 
l0: 0.404395, l1: 0.403814, l2: 0.403179, l3: 0.404875, l4: 0.437346, l5: 0.507685, l6: 0.633182

[epoch:  37/100, batch:   578/  792, ite: 52225] train loss: 4.991193, tar: 0.527230 
l0: 0.419183, l1: 0.423313, l2: 0.424276, l3: 0.427935, l4: 0.455839, l5: 0.638252, l6: 0.815091

[epoch:  37/100, batch:   580/  792, ite: 52226] train loss: 4.988825, tar: 0.526752 
l0: 0.589990, l1: 0.599663, l2: 0.602408, l3: 0.607564, l4: 0.636937, l5: 0.778193, l6: 0.926608

[epoch:  37/100, batch:   582/  792, ite: 52227] train loss: 4.992145, tar: 0.527030 
l0: 0.372080, l1: 0.374096, l2: 0.374477, l3: 0.376597, l4: 0.413777, l5: 0.490095, l6: 0.727151

[epoch:  37/100, batch:   584/  792, ite: 52228] train loss: 4.987685, tar: 0.526351 
l0: 0.491582, l1: 0.487978, l2: 0.488806, l3: 0.499486, l4: 0.552471, l5: 0.670364, l6: 0.804236

[epoch:  37/100, batch:   586/  792, ite: 52229] train loss: 4.986882, tar: 0.52

[epoch:  37/100, batch:   666/  792, ite: 52269] train loss: 4.958036, tar: 0.521967 
l0: 0.598711, l1: 0.594930, l2: 0.598080, l3: 0.593923, l4: 0.609695, l5: 0.678804, l6: 0.962521

[epoch:  37/100, batch:   668/  792, ite: 52270] train loss: 4.960891, tar: 0.522251 
l0: 0.227359, l1: 0.228293, l2: 0.230999, l3: 0.237482, l4: 0.258092, l5: 0.382399, l6: 0.554316

[epoch:  37/100, batch:   670/  792, ite: 52271] train loss: 4.952412, tar: 0.521163 
l0: 0.382908, l1: 0.385363, l2: 0.384065, l3: 0.392321, l4: 0.449323, l5: 0.568321, l6: 0.702185

[epoch:  37/100, batch:   672/  792, ite: 52272] train loss: 4.948930, tar: 0.520654 
l0: 0.542433, l1: 0.546660, l2: 0.548274, l3: 0.553616, l4: 0.585323, l5: 0.740662, l6: 0.949805

[epoch:  37/100, batch:   674/  792, ite: 52273] train loss: 4.950456, tar: 0.520734 
l0: 0.484061, l1: 0.484765, l2: 0.476510, l3: 0.490907, l4: 0.535779, l5: 0.637885, l6: 1.020071

[epoch:  37/100, batch:   676/  792, ite: 52274] train loss: 4.952328, tar: 0.52

[epoch:  37/100, batch:   756/  792, ite: 52314] train loss: 4.973601, tar: 0.523579 
l0: 0.420540, l1: 0.417867, l2: 0.418934, l3: 0.424035, l4: 0.440931, l5: 0.571980, l6: 0.769696

[epoch:  37/100, batch:   758/  792, ite: 52315] train loss: 4.971426, tar: 0.523252 
l0: 0.553718, l1: 0.555669, l2: 0.559761, l3: 0.568315, l4: 0.627642, l5: 0.714443, l6: 0.901740

[epoch:  37/100, batch:   760/  792, ite: 52316] train loss: 4.973273, tar: 0.523348 
l0: 0.299848, l1: 0.313200, l2: 0.315486, l3: 0.325991, l4: 0.336978, l5: 0.457954, l6: 0.559862

[epoch:  37/100, batch:   762/  792, ite: 52317] train loss: 4.968202, tar: 0.522643 
l0: 0.636316, l1: 0.639341, l2: 0.643014, l3: 0.665949, l4: 0.713898, l5: 0.727545, l6: 0.840309

[epoch:  37/100, batch:   764/  792, ite: 52318] train loss: 4.970723, tar: 0.523000 
l0: 0.364644, l1: 0.364110, l2: 0.362605, l3: 0.366647, l4: 0.375787, l5: 0.525373, l6: 0.748716

[epoch:  37/100, batch:   766/  792, ite: 52319] train loss: 4.967344, tar: 0.52

l0: 0.491902, l1: 0.485191, l2: 0.484608, l3: 0.483984, l4: 0.488718, l5: 0.561883, l6: 0.727042

[epoch:  38/100, batch:    54/  792, ite: 52359] train loss: 4.923896, tar: 0.518061 
l0: 0.516851, l1: 0.519250, l2: 0.523186, l3: 0.526572, l4: 0.563296, l5: 0.675156, l6: 0.831441

[epoch:  38/100, batch:    56/  792, ite: 52360] train loss: 4.924471, tar: 0.518058 
l0: 0.337484, l1: 0.341704, l2: 0.341584, l3: 0.345737, l4: 0.348835, l5: 0.406738, l6: 0.503010

[epoch:  38/100, batch:    58/  792, ite: 52361] train loss: 4.919495, tar: 0.517558 
l0: 0.320459, l1: 0.323183, l2: 0.318033, l3: 0.323068, l4: 0.340383, l5: 0.426565, l6: 0.539158

[epoch:  38/100, batch:    60/  792, ite: 52362] train loss: 4.914581, tar: 0.517013 
l0: 0.469536, l1: 0.469599, l2: 0.471508, l3: 0.474497, l4: 0.489013, l5: 0.524130, l6: 0.751516

[epoch:  38/100, batch:    62/  792, ite: 52363] train loss: 4.913058, tar: 0.516882 
l0: 0.257525, l1: 0.262996, l2: 0.260622, l3: 0.264161, l4: 0.284464, l5: 0.5046

[epoch:  38/100, batch:   142/  792, ite: 52403] train loss: 4.897875, tar: 0.514071 
l0: 0.419506, l1: 0.416001, l2: 0.415147, l3: 0.420999, l4: 0.441408, l5: 0.519453, l6: 0.725661

[epoch:  38/100, batch:   144/  792, ite: 52404] train loss: 4.895993, tar: 0.513836 
l0: 0.591824, l1: 0.589524, l2: 0.591984, l3: 0.608568, l4: 0.636974, l5: 0.666179, l6: 0.776548

[epoch:  38/100, batch:   146/  792, ite: 52405] train loss: 4.897115, tar: 0.514029 
l0: 0.888196, l1: 0.879952, l2: 0.881174, l3: 0.889544, l4: 0.890351, l5: 0.866155, l6: 0.977134

[epoch:  38/100, batch:   148/  792, ite: 52406] train loss: 4.903140, tar: 0.514951 
l0: 0.403439, l1: 0.403554, l2: 0.401927, l3: 0.407724, l4: 0.466882, l5: 0.499472, l6: 0.667406

[epoch:  38/100, batch:   150/  792, ite: 52407] train loss: 4.900905, tar: 0.514677 
l0: 0.886260, l1: 0.893580, l2: 0.891590, l3: 0.903370, l4: 0.929236, l5: 0.921275, l6: 1.004206

[epoch:  38/100, batch:   152/  792, ite: 52408] train loss: 4.907419, tar: 0.51

[epoch:  38/100, batch:   232/  792, ite: 52448] train loss: 4.901592, tar: 0.513969 
l0: 0.531206, l1: 0.531209, l2: 0.533422, l3: 0.527223, l4: 0.562115, l5: 0.662312, l6: 0.842568

[epoch:  38/100, batch:   234/  792, ite: 52449] train loss: 4.902020, tar: 0.514007 
l0: 0.811828, l1: 0.789292, l2: 0.787127, l3: 0.789900, l4: 0.797470, l5: 0.827778, l6: 0.836069

[epoch:  38/100, batch:   236/  792, ite: 52450] train loss: 4.905775, tar: 0.514669 
l0: 0.525341, l1: 0.530529, l2: 0.530414, l3: 0.531644, l4: 0.535843, l5: 0.575671, l6: 0.750761

[epoch:  38/100, batch:   238/  792, ite: 52451] train loss: 4.905576, tar: 0.514693 
l0: 0.677389, l1: 0.676165, l2: 0.676994, l3: 0.670555, l4: 0.700166, l5: 0.812923, l6: 0.997426

[epoch:  38/100, batch:   240/  792, ite: 52452] train loss: 4.908362, tar: 0.515052 
l0: 0.641889, l1: 0.645004, l2: 0.639538, l3: 0.626522, l4: 0.612840, l5: 0.630640, l6: 0.873728

[epoch:  38/100, batch:   242/  792, ite: 52453] train loss: 4.909976, tar: 0.51

[epoch:  38/100, batch:   322/  792, ite: 52493] train loss: 4.939530, tar: 0.519158 
l0: 0.379220, l1: 0.384145, l2: 0.387798, l3: 0.391356, l4: 0.402307, l5: 0.498508, l6: 0.767107

[epoch:  38/100, batch:   324/  792, ite: 52494] train loss: 4.937575, tar: 0.518875 
l0: 0.316097, l1: 0.320774, l2: 0.315007, l3: 0.318781, l4: 0.371722, l5: 0.523599, l6: 0.757469

[epoch:  38/100, batch:   326/  792, ite: 52495] train loss: 4.934828, tar: 0.518465 
l0: 0.476399, l1: 0.481223, l2: 0.481713, l3: 0.486682, l4: 0.487386, l5: 0.560993, l6: 0.692764

[epoch:  38/100, batch:   328/  792, ite: 52496] train loss: 4.933763, tar: 0.518380 
l0: 0.519014, l1: 0.521516, l2: 0.522702, l3: 0.521539, l4: 0.535792, l5: 0.571465, l6: 0.561823

[epoch:  38/100, batch:   330/  792, ite: 52497] train loss: 4.932773, tar: 0.518382 
l0: 0.217477, l1: 0.221361, l2: 0.219531, l3: 0.229929, l4: 0.287761, l5: 0.427410, l6: 0.555522

[epoch:  38/100, batch:   332/  792, ite: 52498] train loss: 4.928355, tar: 0.51

[epoch:  38/100, batch:   412/  792, ite: 52538] train loss: 4.948474, tar: 0.519860 
l0: 0.457458, l1: 0.461029, l2: 0.462528, l3: 0.468080, l4: 0.480341, l5: 0.678344, l6: 0.800224

[epoch:  38/100, batch:   414/  792, ite: 52539] train loss: 4.947982, tar: 0.519744 
l0: 1.097104, l1: 1.113511, l2: 1.106119, l3: 1.124316, l4: 1.193129, l5: 1.308852, l6: 1.563194

[epoch:  38/100, batch:   416/  792, ite: 52540] train loss: 4.957798, tar: 0.520813 
l0: 0.481621, l1: 0.481897, l2: 0.482026, l3: 0.494851, l4: 0.546585, l5: 0.688705, l6: 0.815149

[epoch:  38/100, batch:   418/  792, ite: 52541] train loss: 4.957733, tar: 0.520741 
l0: 0.291671, l1: 0.295823, l2: 0.296428, l3: 0.301689, l4: 0.354365, l5: 0.401387, l6: 0.579049

[epoch:  38/100, batch:   420/  792, ite: 52542] train loss: 4.954498, tar: 0.520318 
l0: 0.434200, l1: 0.438238, l2: 0.437356, l3: 0.450642, l4: 0.486653, l5: 0.560512, l6: 0.660034

[epoch:  38/100, batch:   422/  792, ite: 52543] train loss: 4.953075, tar: 0.52

[epoch:  38/100, batch:   502/  792, ite: 52583] train loss: 4.982143, tar: 0.524121 
l0: 0.232227, l1: 0.231594, l2: 0.234344, l3: 0.238565, l4: 0.265057, l5: 0.383311, l6: 0.564661

[epoch:  38/100, batch:   504/  792, ite: 52584] train loss: 4.978451, tar: 0.523621 
l0: 0.415783, l1: 0.426264, l2: 0.426539, l3: 0.434387, l4: 0.471577, l5: 0.599131, l6: 0.844199

[epoch:  38/100, batch:   506/  792, ite: 52585] train loss: 4.977841, tar: 0.523437 
l0: 0.372399, l1: 0.382516, l2: 0.382500, l3: 0.394741, l4: 0.409267, l5: 0.492067, l6: 0.533586

[epoch:  38/100, batch:   508/  792, ite: 52586] train loss: 4.975474, tar: 0.523179 
l0: 0.292734, l1: 0.291583, l2: 0.293078, l3: 0.301082, l4: 0.324749, l5: 0.448681, l6: 0.593053

[epoch:  38/100, batch:   510/  792, ite: 52587] train loss: 4.972361, tar: 0.522787 
l0: 0.522886, l1: 0.522674, l2: 0.526781, l3: 0.531601, l4: 0.551135, l5: 0.645811, l6: 0.769531

[epoch:  38/100, batch:   512/  792, ite: 52588] train loss: 4.972286, tar: 0.52

[epoch:  38/100, batch:   592/  792, ite: 52628] train loss: 4.975630, tar: 0.522696 
l0: 0.548930, l1: 0.540666, l2: 0.541272, l3: 0.555332, l4: 0.600739, l5: 0.715252, l6: 1.011166

[epoch:  38/100, batch:   594/  792, ite: 52629] train loss: 4.976600, tar: 0.522738 
l0: 0.317608, l1: 0.317922, l2: 0.316635, l3: 0.328231, l4: 0.340662, l5: 0.417050, l6: 0.507483

[epoch:  38/100, batch:   596/  792, ite: 52630] train loss: 4.973562, tar: 0.522412 
l0: 0.472867, l1: 0.477496, l2: 0.478766, l3: 0.481164, l4: 0.516171, l5: 0.594133, l6: 0.977918

[epoch:  38/100, batch:   598/  792, ite: 52631] train loss: 4.973768, tar: 0.522333 
l0: 1.136816, l1: 1.136531, l2: 1.136257, l3: 1.157945, l4: 1.186301, l5: 1.228423, l6: 1.148320

[epoch:  38/100, batch:   600/  792, ite: 52632] train loss: 4.980585, tar: 0.523306 
l0: 0.586719, l1: 0.596024, l2: 0.598327, l3: 0.597674, l4: 0.611453, l5: 0.663447, l6: 1.086371

[epoch:  38/100, batch:   602/  792, ite: 52633] train loss: 4.982129, tar: 0.52

[epoch:  38/100, batch:   682/  792, ite: 52673] train loss: 4.973505, tar: 0.521711 
l0: 0.889234, l1: 0.889922, l2: 0.888940, l3: 0.884251, l4: 0.916403, l5: 0.986372, l6: 1.127757

[epoch:  38/100, batch:   684/  792, ite: 52674] train loss: 4.977666, tar: 0.522256 
l0: 0.861389, l1: 0.865976, l2: 0.885261, l3: 0.911752, l4: 0.966755, l5: 1.127671, l6: 1.135250

[epoch:  38/100, batch:   686/  792, ite: 52675] train loss: 4.982135, tar: 0.522758 
l0: 0.255229, l1: 0.260627, l2: 0.263129, l3: 0.257295, l4: 0.251249, l5: 0.373781, l6: 0.479790

[epoch:  38/100, batch:   688/  792, ite: 52676] train loss: 4.978637, tar: 0.522362 
l0: 0.291764, l1: 0.300298, l2: 0.305375, l3: 0.313819, l4: 0.342275, l5: 0.475787, l6: 0.593196

[epoch:  38/100, batch:   690/  792, ite: 52677] train loss: 4.976119, tar: 0.522022 
l0: 0.758059, l1: 0.766161, l2: 0.767298, l3: 0.773067, l4: 0.800057, l5: 0.847773, l6: 1.381307

[epoch:  38/100, batch:   692/  792, ite: 52678] train loss: 4.979438, tar: 0.52

[epoch:  38/100, batch:   772/  792, ite: 52718] train loss: 4.985220, tar: 0.522925 
l0: 0.551813, l1: 0.553900, l2: 0.561218, l3: 0.567047, l4: 0.563366, l5: 0.718318, l6: 0.774387

[epoch:  38/100, batch:   774/  792, ite: 52719] train loss: 4.985455, tar: 0.522965 
l0: 0.376872, l1: 0.376100, l2: 0.374839, l3: 0.381642, l4: 0.402563, l5: 0.520215, l6: 0.640874

[epoch:  38/100, batch:   776/  792, ite: 52720] train loss: 4.983662, tar: 0.522762 
l0: 0.514620, l1: 0.517309, l2: 0.516425, l3: 0.516876, l4: 0.548227, l5: 0.623237, l6: 0.863962

[epoch:  38/100, batch:   778/  792, ite: 52721] train loss: 4.983893, tar: 0.522751 
l0: 0.284967, l1: 0.290181, l2: 0.293471, l3: 0.296737, l4: 0.347597, l5: 0.430066, l6: 0.635228

[epoch:  38/100, batch:   780/  792, ite: 52722] train loss: 4.981426, tar: 0.522421 
l0: 0.286523, l1: 0.289679, l2: 0.291387, l3: 0.288777, l4: 0.310682, l5: 0.395456, l6: 0.530295

[epoch:  38/100, batch:   782/  792, ite: 52723] train loss: 4.978643, tar: 0.52

l0: 0.439798, l1: 0.438357, l2: 0.438045, l3: 0.443876, l4: 0.469974, l5: 0.556530, l6: 0.674780

[epoch:  39/100, batch:    70/  792, ite: 52763] train loss: 4.956775, tar: 0.519876 
l0: 0.644349, l1: 0.641813, l2: 0.643076, l3: 0.642238, l4: 0.664107, l5: 0.796800, l6: 0.759027

[epoch:  39/100, batch:    72/  792, ite: 52764] train loss: 4.957652, tar: 0.520039 
l0: 0.599995, l1: 0.602132, l2: 0.601571, l3: 0.609647, l4: 0.658285, l5: 0.785376, l6: 1.053770

[epoch:  39/100, batch:    74/  792, ite: 52765] train loss: 4.959041, tar: 0.520143 
l0: 0.217870, l1: 0.222232, l2: 0.217939, l3: 0.220167, l4: 0.258137, l5: 0.332646, l6: 0.395585

[epoch:  39/100, batch:    76/  792, ite: 52766] train loss: 4.955598, tar: 0.519749 
l0: 0.662813, l1: 0.673143, l2: 0.669660, l3: 0.672691, l4: 0.673339, l5: 0.718793, l6: 0.937347

[epoch:  39/100, batch:    78/  792, ite: 52767] train loss: 4.956978, tar: 0.519935 
l0: 0.295176, l1: 0.292983, l2: 0.293525, l3: 0.307435, l4: 0.337716, l5: 0.4675

[epoch:  39/100, batch:   158/  792, ite: 52807] train loss: 4.942843, tar: 0.518440 
l0: 0.577796, l1: 0.577013, l2: 0.577303, l3: 0.577918, l4: 0.621170, l5: 0.771868, l6: 0.982448

[epoch:  39/100, batch:   160/  792, ite: 52808] train loss: 4.943743, tar: 0.518514 
l0: 0.297921, l1: 0.300718, l2: 0.303859, l3: 0.309475, l4: 0.329336, l5: 0.409907, l6: 0.620673

[epoch:  39/100, batch:   162/  792, ite: 52809] train loss: 4.941596, tar: 0.518241 
l0: 0.519709, l1: 0.523338, l2: 0.523521, l3: 0.523962, l4: 0.550839, l5: 0.713314, l6: 0.819688

[epoch:  39/100, batch:   164/  792, ite: 52810] train loss: 4.941798, tar: 0.518243 
l0: 0.356169, l1: 0.361789, l2: 0.359222, l3: 0.370911, l4: 0.430244, l5: 0.516882, l6: 0.614633

[epoch:  39/100, batch:   166/  792, ite: 52811] train loss: 4.940477, tar: 0.518043 
l0: 0.471096, l1: 0.463786, l2: 0.458751, l3: 0.445847, l4: 0.507781, l5: 0.634416, l6: 0.732798

[epoch:  39/100, batch:   168/  792, ite: 52812] train loss: 4.939906, tar: 0.51

[epoch:  39/100, batch:   248/  792, ite: 52852] train loss: 4.953413, tar: 0.519180 
l0: 0.886777, l1: 0.903473, l2: 0.901825, l3: 0.902637, l4: 0.906483, l5: 1.058902, l6: 1.257425

[epoch:  39/100, batch:   250/  792, ite: 52853] train loss: 4.957185, tar: 0.519611 
l0: 0.431774, l1: 0.429883, l2: 0.432270, l3: 0.445646, l4: 0.515789, l5: 0.609509, l6: 0.836413

[epoch:  39/100, batch:   252/  792, ite: 52854] train loss: 4.956739, tar: 0.519508 
l0: 0.487395, l1: 0.485639, l2: 0.488184, l3: 0.496313, l4: 0.514437, l5: 0.646532, l6: 0.918666

[epoch:  39/100, batch:   254/  792, ite: 52855] train loss: 4.956873, tar: 0.519470 
l0: 0.701765, l1: 0.704408, l2: 0.702180, l3: 0.702649, l4: 0.730585, l5: 0.893203, l6: 1.195898

[epoch:  39/100, batch:   256/  792, ite: 52856] train loss: 4.959347, tar: 0.519683 
l0: 0.823393, l1: 0.819497, l2: 0.819894, l3: 0.816457, l4: 0.854119, l5: 0.950839, l6: 1.101413

[epoch:  39/100, batch:   258/  792, ite: 52857] train loss: 4.962136, tar: 0.52

[epoch:  39/100, batch:   338/  792, ite: 52897] train loss: 4.961801, tar: 0.519398 
l0: 0.500942, l1: 0.501663, l2: 0.499648, l3: 0.505624, l4: 0.532823, l5: 0.600292, l6: 0.739506

[epoch:  39/100, batch:   340/  792, ite: 52898] train loss: 4.961515, tar: 0.519377 
l0: 0.302800, l1: 0.305351, l2: 0.303458, l3: 0.297011, l4: 0.299959, l5: 0.405981, l6: 0.527287

[epoch:  39/100, batch:   342/  792, ite: 52899] train loss: 4.959368, tar: 0.519136 
l0: 0.448832, l1: 0.452289, l2: 0.452028, l3: 0.455180, l4: 0.467404, l5: 0.557032, l6: 0.803102

[epoch:  39/100, batch:   344/  792, ite: 52900] train loss: 4.958959, tar: 0.519058 
l0: 0.535365, l1: 0.535808, l2: 0.539721, l3: 0.537927, l4: 0.565243, l5: 0.734154, l6: 0.835266

[epoch:  39/100, batch:   346/  792, ite: 52901] train loss: 4.959296, tar: 0.519076 
l0: 0.527751, l1: 0.531269, l2: 0.528836, l3: 0.538118, l4: 0.544054, l5: 0.610240, l6: 0.702296

[epoch:  39/100, batch:   348/  792, ite: 52902] train loss: 4.959052, tar: 0.51

[epoch:  39/100, batch:   428/  792, ite: 52942] train loss: 4.965801, tar: 0.519887 
l0: 0.851338, l1: 0.851393, l2: 0.849882, l3: 0.850208, l4: 0.818035, l5: 0.860195, l6: 0.992024

[epoch:  39/100, batch:   430/  792, ite: 52943] train loss: 4.968204, tar: 0.520238 
l0: 0.222752, l1: 0.227592, l2: 0.226685, l3: 0.226584, l4: 0.275726, l5: 0.407041, l6: 0.472809

[epoch:  39/100, batch:   432/  792, ite: 52944] train loss: 4.965654, tar: 0.519923 
l0: 0.716935, l1: 0.730220, l2: 0.724732, l3: 0.718398, l4: 0.744384, l5: 0.878221, l6: 1.032846

[epoch:  39/100, batch:   434/  792, ite: 52945] train loss: 4.967608, tar: 0.520132 
l0: 0.668141, l1: 0.677280, l2: 0.667122, l3: 0.653522, l4: 0.681222, l5: 0.652462, l6: 0.669436

[epoch:  39/100, batch:   436/  792, ite: 52946] train loss: 4.968086, tar: 0.520288 
l0: 0.844401, l1: 0.850907, l2: 0.844433, l3: 0.840254, l4: 0.871456, l5: 0.900466, l6: 1.400838

[epoch:  39/100, batch:   438/  792, ite: 52947] train loss: 4.971342, tar: 0.52

[epoch:  39/100, batch:   518/  792, ite: 52987] train loss: 4.964374, tar: 0.519687 
l0: 0.314479, l1: 0.316912, l2: 0.314444, l3: 0.319579, l4: 0.340101, l5: 0.410329, l6: 0.563567

[epoch:  39/100, batch:   520/  792, ite: 52988] train loss: 4.962524, tar: 0.519480 
l0: 0.544907, l1: 0.541941, l2: 0.541110, l3: 0.559053, l4: 0.611998, l5: 0.738818, l6: 0.810289

[epoch:  39/100, batch:   522/  792, ite: 52989] train loss: 4.962920, tar: 0.519505 
l0: 0.344003, l1: 0.345538, l2: 0.343967, l3: 0.343565, l4: 0.391773, l5: 0.523304, l6: 0.715799

[epoch:  39/100, batch:   524/  792, ite: 52990] train loss: 4.961746, tar: 0.519328 
l0: 0.622549, l1: 0.626146, l2: 0.629950, l3: 0.641696, l4: 0.656234, l5: 0.848762, l6: 0.899601

[epoch:  39/100, batch:   526/  792, ite: 52991] train loss: 4.962673, tar: 0.519432 
l0: 0.214038, l1: 0.223214, l2: 0.228799, l3: 0.243554, l4: 0.272733, l5: 0.424765, l6: 0.672808

[epoch:  39/100, batch:   528/  792, ite: 52992] train loss: 4.960669, tar: 0.51

[epoch:  39/100, batch:   608/  792, ite: 53032] train loss: 4.964285, tar: 0.519292 
l0: 0.273599, l1: 0.276217, l2: 0.275474, l3: 0.276792, l4: 0.310833, l5: 0.365953, l6: 0.513383

[epoch:  39/100, batch:   610/  792, ite: 53033] train loss: 4.962173, tar: 0.519054 
l0: 0.414447, l1: 0.418354, l2: 0.415449, l3: 0.412571, l4: 0.422025, l5: 0.507275, l6: 0.566873

[epoch:  39/100, batch:   612/  792, ite: 53034] train loss: 4.961056, tar: 0.518953 
l0: 0.543903, l1: 0.544623, l2: 0.547389, l3: 0.543711, l4: 0.522195, l5: 0.613525, l6: 0.734303

[epoch:  39/100, batch:   614/  792, ite: 53035] train loss: 4.960999, tar: 0.518977 
l0: 0.599531, l1: 0.598418, l2: 0.598241, l3: 0.598981, l4: 0.605192, l5: 0.692020, l6: 0.717632

[epoch:  39/100, batch:   616/  792, ite: 53036] train loss: 4.961140, tar: 0.519055 
l0: 0.435788, l1: 0.435934, l2: 0.437100, l3: 0.438402, l4: 0.478717, l5: 0.525176, l6: 0.633403

[epoch:  39/100, batch:   618/  792, ite: 53037] train loss: 4.960374, tar: 0.51

[epoch:  39/100, batch:   698/  792, ite: 53077] train loss: 4.957662, tar: 0.518791 
l0: 0.283686, l1: 0.285227, l2: 0.284160, l3: 0.290139, l4: 0.319208, l5: 0.414464, l6: 0.524934

[epoch:  39/100, batch:   700/  792, ite: 53078] train loss: 4.955941, tar: 0.518573 
l0: 0.240803, l1: 0.246709, l2: 0.247407, l3: 0.261357, l4: 0.303995, l5: 0.427800, l6: 0.567112

[epoch:  39/100, batch:   702/  792, ite: 53079] train loss: 4.954070, tar: 0.518316 
l0: 0.286306, l1: 0.293136, l2: 0.295191, l3: 0.301771, l4: 0.336323, l5: 0.533246, l6: 0.627142

[epoch:  39/100, batch:   704/  792, ite: 53080] train loss: 4.952728, tar: 0.518101 
l0: 0.413863, l1: 0.415343, l2: 0.414279, l3: 0.418289, l4: 0.435570, l5: 0.512354, l6: 0.645666

[epoch:  39/100, batch:   706/  792, ite: 53081] train loss: 4.951817, tar: 0.518005 
l0: 0.346979, l1: 0.356707, l2: 0.360060, l3: 0.364608, l4: 0.404766, l5: 0.605717, l6: 0.699203

[epoch:  39/100, batch:   708/  792, ite: 53082] train loss: 4.950789, tar: 0.51

[epoch:  39/100, batch:   788/  792, ite: 53122] train loss: 4.950023, tar: 0.517702 
l0: 0.808672, l1: 0.816035, l2: 0.805997, l3: 0.808153, l4: 0.831045, l5: 0.873315, l6: 0.889389

[epoch:  39/100, batch:   790/  792, ite: 53123] train loss: 4.951686, tar: 0.517961 
l0: 0.404229, l1: 0.407231, l2: 0.409786, l3: 0.404299, l4: 0.423039, l5: 0.512457, l6: 0.718485

[epoch:  39/100, batch:   792/  792, ite: 53124] train loss: 4.950838, tar: 0.517860 
Starting epoch 40
Epoch 40 loading complete
l0: 0.796274, l1: 0.794611, l2: 0.796656, l3: 0.801926, l4: 0.819691, l5: 0.933888, l6: 1.189201

[epoch:  40/100, batch:     2/  792, ite: 53125] train loss: 4.952872, tar: 0.518107 
l0: 0.645476, l1: 0.650648, l2: 0.647669, l3: 0.650975, l4: 0.675729, l5: 0.750955, l6: 0.906217

[epoch:  40/100, batch:     4/  792, ite: 53126] train loss: 4.953801, tar: 0.518220 
l0: 0.446135, l1: 0.453718, l2: 0.453134, l3: 0.461747, l4: 0.460121, l5: 0.562509, l6: 0.737712

[epoch:  40/100, batch:     6/  792,

l0: 0.534133, l1: 0.550337, l2: 0.558711, l3: 0.567041, l4: 0.614009, l5: 0.673170, l6: 1.095852

[epoch:  40/100, batch:    86/  792, ite: 53167] train loss: 4.935493, tar: 0.515786 
l0: 0.445920, l1: 0.446295, l2: 0.446456, l3: 0.440039, l4: 0.450528, l5: 0.546164, l6: 0.617268

[epoch:  40/100, batch:    88/  792, ite: 53168] train loss: 4.934811, tar: 0.515726 
l0: 0.441916, l1: 0.441217, l2: 0.443850, l3: 0.449975, l4: 0.489027, l5: 0.586652, l6: 0.818787

[epoch:  40/100, batch:    90/  792, ite: 53169] train loss: 4.934382, tar: 0.515663 
l0: 0.820302, l1: 0.831731, l2: 0.823994, l3: 0.830252, l4: 0.839857, l5: 0.965318, l6: 1.000761

[epoch:  40/100, batch:    92/  792, ite: 53170] train loss: 4.936283, tar: 0.515923 
l0: 0.391283, l1: 0.395858, l2: 0.397797, l3: 0.395143, l4: 0.410009, l5: 0.488466, l6: 0.653954

[epoch:  40/100, batch:    94/  792, ite: 53171] train loss: 4.935328, tar: 0.515817 
l0: 0.246463, l1: 0.246328, l2: 0.247545, l3: 0.257328, l4: 0.280016, l5: 0.3351

[epoch:  40/100, batch:   174/  792, ite: 53211] train loss: 4.934066, tar: 0.515391 
l0: 0.408515, l1: 0.410427, l2: 0.409583, l3: 0.407857, l4: 0.432061, l5: 0.582459, l6: 0.747647

[epoch:  40/100, batch:   176/  792, ite: 53212] train loss: 4.933500, tar: 0.515303 
l0: 0.395796, l1: 0.390530, l2: 0.393541, l3: 0.403451, l4: 0.413389, l5: 0.447279, l6: 0.588504

[epoch:  40/100, batch:   178/  792, ite: 53213] train loss: 4.932566, tar: 0.515205 
l0: 0.583565, l1: 0.589799, l2: 0.589727, l3: 0.590759, l4: 0.650859, l5: 0.868256, l6: 0.962112

[epoch:  40/100, batch:   180/  792, ite: 53214] train loss: 4.933290, tar: 0.515261 
l0: 0.557795, l1: 0.566636, l2: 0.565823, l3: 0.565441, l4: 0.577178, l5: 0.634431, l6: 0.976741

[epoch:  40/100, batch:   182/  792, ite: 53215] train loss: 4.933756, tar: 0.515296 
l0: 0.677623, l1: 0.676247, l2: 0.681410, l3: 0.684257, l4: 0.693276, l5: 0.727705, l6: 0.769860

[epoch:  40/100, batch:   184/  792, ite: 53216] train loss: 4.934451, tar: 0.51

[epoch:  40/100, batch:   264/  792, ite: 53256] train loss: 4.929255, tar: 0.514769 
l0: 0.472287, l1: 0.476245, l2: 0.475164, l3: 0.471271, l4: 0.499345, l5: 0.626273, l6: 0.933749

[epoch:  40/100, batch:   266/  792, ite: 53257] train loss: 4.929209, tar: 0.514735 
l0: 0.519349, l1: 0.528147, l2: 0.521405, l3: 0.512766, l4: 0.526459, l5: 0.669616, l6: 0.875236

[epoch:  40/100, batch:   268/  792, ite: 53258] train loss: 4.929309, tar: 0.514739 
l0: 0.637623, l1: 0.640524, l2: 0.640233, l3: 0.637213, l4: 0.658047, l5: 0.767556, l6: 1.029264

[epoch:  40/100, batch:   270/  792, ite: 53259] train loss: 4.930286, tar: 0.514836 
l0: 0.364073, l1: 0.366645, l2: 0.366114, l3: 0.367531, l4: 0.422526, l5: 0.479259, l6: 0.768819

[epoch:  40/100, batch:   272/  792, ite: 53260] train loss: 4.929580, tar: 0.514716 
l0: 0.981296, l1: 0.992785, l2: 0.993505, l3: 0.998144, l4: 1.013101, l5: 1.034961, l6: 1.485964

[epoch:  40/100, batch:   274/  792, ite: 53261] train loss: 4.932910, tar: 0.51

[epoch:  40/100, batch:   354/  792, ite: 53301] train loss: 4.940690, tar: 0.515945 
l0: 0.331975, l1: 0.333865, l2: 0.335543, l3: 0.339758, l4: 0.407700, l5: 0.654086, l6: 0.763371

[epoch:  40/100, batch:   356/  792, ite: 53302] train loss: 4.939914, tar: 0.515804 
l0: 0.290043, l1: 0.289104, l2: 0.291153, l3: 0.288390, l4: 0.299850, l5: 0.491773, l6: 0.584953

[epoch:  40/100, batch:   358/  792, ite: 53303] train loss: 4.938553, tar: 0.515631 
l0: 0.604862, l1: 0.603785, l2: 0.606681, l3: 0.609442, l4: 0.641414, l5: 0.762730, l6: 0.915871

[epoch:  40/100, batch:   360/  792, ite: 53304] train loss: 4.939132, tar: 0.515699 
l0: 0.514664, l1: 0.521519, l2: 0.518652, l3: 0.522296, l4: 0.522329, l5: 0.498513, l6: 0.554685

[epoch:  40/100, batch:   362/  792, ite: 53305] train loss: 4.938639, tar: 0.515698 
l0: 0.318259, l1: 0.320301, l2: 0.322182, l3: 0.319575, l4: 0.328773, l5: 0.457739, l6: 0.576324

[epoch:  40/100, batch:   364/  792, ite: 53306] train loss: 4.937395, tar: 0.51

[epoch:  40/100, batch:   444/  792, ite: 53346] train loss: 4.930276, tar: 0.514524 
l0: 0.490688, l1: 0.492503, l2: 0.493054, l3: 0.493167, l4: 0.493671, l5: 0.540386, l6: 0.681329

[epoch:  40/100, batch:   446/  792, ite: 53347] train loss: 4.929939, tar: 0.514506 
l0: 0.494650, l1: 0.495425, l2: 0.490906, l3: 0.510027, l4: 0.567826, l5: 0.610567, l6: 0.763844

[epoch:  40/100, batch:   448/  792, ite: 53348] train loss: 4.929812, tar: 0.514492 
l0: 0.221628, l1: 0.224927, l2: 0.226613, l3: 0.222437, l4: 0.260958, l5: 0.439978, l6: 0.580967

[epoch:  40/100, batch:   450/  792, ite: 53349] train loss: 4.928187, tar: 0.514274 
l0: 0.681397, l1: 0.681596, l2: 0.683824, l3: 0.683456, l4: 0.696107, l5: 0.783978, l6: 0.998525

[epoch:  40/100, batch:   452/  792, ite: 53350] train loss: 4.929172, tar: 0.514398 
l0: 1.034637, l1: 1.060239, l2: 1.049189, l3: 1.047963, l4: 1.031979, l5: 1.090218, l6: 1.046895

[epoch:  40/100, batch:   454/  792, ite: 53351] train loss: 4.931726, tar: 0.51

[epoch:  40/100, batch:   534/  792, ite: 53391] train loss: 4.926746, tar: 0.514270 
l0: 0.246903, l1: 0.252754, l2: 0.250299, l3: 0.256936, l4: 0.277205, l5: 0.386231, l6: 0.485950

[epoch:  40/100, batch:   536/  792, ite: 53392] train loss: 4.925124, tar: 0.514077 
l0: 0.486225, l1: 0.487409, l2: 0.488697, l3: 0.478212, l4: 0.484091, l5: 0.580379, l6: 0.754570

[epoch:  40/100, batch:   538/  792, ite: 53393] train loss: 4.924851, tar: 0.514057 
l0: 0.546694, l1: 0.554480, l2: 0.557484, l3: 0.575908, l4: 0.602569, l5: 0.604780, l6: 0.728481

[epoch:  40/100, batch:   540/  792, ite: 53394] train loss: 4.924953, tar: 0.514081 
l0: 0.423079, l1: 0.427040, l2: 0.430382, l3: 0.433766, l4: 0.451800, l5: 0.478391, l6: 0.547225

[epoch:  40/100, batch:   542/  792, ite: 53395] train loss: 4.924165, tar: 0.514016 
l0: 0.781521, l1: 0.806479, l2: 0.804983, l3: 0.802522, l4: 0.842257, l5: 0.881298, l6: 1.120355

[epoch:  40/100, batch:   544/  792, ite: 53396] train loss: 4.925845, tar: 0.51

[epoch:  40/100, batch:   624/  792, ite: 53436] train loss: 4.929351, tar: 0.514642 
l0: 0.763405, l1: 0.762951, l2: 0.767661, l3: 0.768303, l4: 0.785151, l5: 0.806861, l6: 0.912874

[epoch:  40/100, batch:   626/  792, ite: 53437] train loss: 4.930518, tar: 0.514815 
l0: 0.413852, l1: 0.417137, l2: 0.414208, l3: 0.419432, l4: 0.446657, l5: 0.502085, l6: 0.868076

[epoch:  40/100, batch:   628/  792, ite: 53438] train loss: 4.930037, tar: 0.514745 
l0: 0.317492, l1: 0.322255, l2: 0.315015, l3: 0.317247, l4: 0.345694, l5: 0.496821, l6: 0.645830

[epoch:  40/100, batch:   630/  792, ite: 53439] train loss: 4.929098, tar: 0.514608 
l0: 0.333002, l1: 0.335389, l2: 0.336524, l3: 0.343015, l4: 0.370817, l5: 0.448126, l6: 0.679252

[epoch:  40/100, batch:   632/  792, ite: 53440] train loss: 4.928127, tar: 0.514482 
l0: 0.820270, l1: 0.817690, l2: 0.828402, l3: 0.853375, l4: 0.918404, l5: 0.915516, l6: 1.023140

[epoch:  40/100, batch:   634/  792, ite: 53441] train loss: 4.929711, tar: 0.51

[epoch:  40/100, batch:   714/  792, ite: 53481] train loss: 4.920016, tar: 0.513693 
l0: 0.768831, l1: 0.776224, l2: 0.780595, l3: 0.793242, l4: 0.794799, l5: 0.862250, l6: 0.911457

[epoch:  40/100, batch:   716/  792, ite: 53482] train loss: 4.921204, tar: 0.513866 
l0: 0.610193, l1: 0.610953, l2: 0.608390, l3: 0.603361, l4: 0.632412, l5: 0.770848, l6: 0.889458

[epoch:  40/100, batch:   718/  792, ite: 53483] train loss: 4.921696, tar: 0.513930 
l0: 0.462442, l1: 0.461096, l2: 0.461256, l3: 0.459065, l4: 0.529679, l5: 0.597501, l6: 0.801956

[epoch:  40/100, batch:   720/  792, ite: 53484] train loss: 4.921452, tar: 0.513896 
l0: 0.595134, l1: 0.608093, l2: 0.607059, l3: 0.606958, l4: 0.630800, l5: 0.714605, l6: 0.788717

[epoch:  40/100, batch:   722/  792, ite: 53485] train loss: 4.921732, tar: 0.513950 
l0: 0.577459, l1: 0.586402, l2: 0.590387, l3: 0.602526, l4: 0.600822, l5: 0.711913, l6: 0.959535

[epoch:  40/100, batch:   724/  792, ite: 53486] train loss: 4.922321, tar: 0.51

l0: 0.420336, l1: 0.415472, l2: 0.415670, l3: 0.424487, l4: 0.435029, l5: 0.538553, l6: 0.648821

[epoch:  41/100, batch:    12/  792, ite: 53526] train loss: 4.929419, tar: 0.515297 
l0: 0.473882, l1: 0.472087, l2: 0.468850, l3: 0.461707, l4: 0.481108, l5: 0.598117, l6: 0.690105

[epoch:  41/100, batch:    14/  792, ite: 53527] train loss: 4.929064, tar: 0.515270 
l0: 0.426720, l1: 0.431829, l2: 0.426104, l3: 0.423396, l4: 0.460235, l5: 0.543664, l6: 0.698084

[epoch:  41/100, batch:    16/  792, ite: 53528] train loss: 4.928640, tar: 0.515212 
l0: 0.244761, l1: 0.250808, l2: 0.249498, l3: 0.256461, l4: 0.331468, l5: 0.461802, l6: 0.582811

[epoch:  41/100, batch:    18/  792, ite: 53529] train loss: 4.927369, tar: 0.515035 
l0: 0.681971, l1: 0.683832, l2: 0.685745, l3: 0.678911, l4: 0.695503, l5: 0.747712, l6: 0.832710

[epoch:  41/100, batch:    20/  792, ite: 53530] train loss: 4.928098, tar: 0.515144 
l0: 0.662093, l1: 0.673963, l2: 0.668020, l3: 0.661319, l4: 0.694378, l5: 0.7853

[epoch:  41/100, batch:   100/  792, ite: 53570] train loss: 4.927066, tar: 0.515244 
l0: 0.594187, l1: 0.597900, l2: 0.603518, l3: 0.613653, l4: 0.646346, l5: 0.812464, l6: 0.977602

[epoch:  41/100, batch:   102/  792, ite: 53571] train loss: 4.927745, tar: 0.515294 
l0: 0.528135, l1: 0.529834, l2: 0.530825, l3: 0.532668, l4: 0.537955, l5: 0.584695, l6: 0.710478

[epoch:  41/100, batch:   104/  792, ite: 53572] train loss: 4.927614, tar: 0.515302 
l0: 0.520234, l1: 0.526330, l2: 0.530429, l3: 0.531579, l4: 0.621560, l5: 0.734628, l6: 0.953073

[epoch:  41/100, batch:   106/  792, ite: 53573] train loss: 4.927942, tar: 0.515306 
l0: 0.641258, l1: 0.644846, l2: 0.647060, l3: 0.645137, l4: 0.661235, l5: 0.704685, l6: 0.924465

[epoch:  41/100, batch:   108/  792, ite: 53574] train loss: 4.928588, tar: 0.515386 
l0: 0.309911, l1: 0.309050, l2: 0.312493, l3: 0.321588, l4: 0.351193, l5: 0.446961, l6: 0.616537

[epoch:  41/100, batch:   110/  792, ite: 53575] train loss: 4.927500, tar: 0.51

[epoch:  41/100, batch:   190/  792, ite: 53615] train loss: 4.926929, tar: 0.515076 
l0: 0.546980, l1: 0.538046, l2: 0.541493, l3: 0.533735, l4: 0.532142, l5: 0.595679, l6: 0.663425

[epoch:  41/100, batch:   192/  792, ite: 53616] train loss: 4.926867, tar: 0.515096 
l0: 0.343457, l1: 0.350518, l2: 0.354073, l3: 0.358005, l4: 0.365961, l5: 0.491767, l6: 0.610161

[epoch:  41/100, batch:   194/  792, ite: 53617] train loss: 4.926002, tar: 0.514990 
l0: 0.754637, l1: 0.755075, l2: 0.753765, l3: 0.747551, l4: 0.752762, l5: 0.795981, l6: 0.912955

[epoch:  41/100, batch:   196/  792, ite: 53618] train loss: 4.926886, tar: 0.515138 
l0: 0.358131, l1: 0.357775, l2: 0.356442, l3: 0.362992, l4: 0.385906, l5: 0.406927, l6: 0.550822

[epoch:  41/100, batch:   198/  792, ite: 53619] train loss: 4.925975, tar: 0.515041 
l0: 0.494838, l1: 0.501300, l2: 0.507037, l3: 0.513197, l4: 0.532236, l5: 0.556909, l6: 0.761539

[epoch:  41/100, batch:   200/  792, ite: 53620] train loss: 4.926006, tar: 0.51

[epoch:  41/100, batch:   280/  792, ite: 53660] train loss: 4.928543, tar: 0.515089 
l0: 0.627802, l1: 0.636611, l2: 0.638952, l3: 0.651003, l4: 0.658196, l5: 0.774387, l6: 0.764611

[epoch:  41/100, batch:   282/  792, ite: 53661] train loss: 4.928927, tar: 0.515157 
l0: 0.463963, l1: 0.470486, l2: 0.470752, l3: 0.473383, l4: 0.514250, l5: 0.572334, l6: 0.711078

[epoch:  41/100, batch:   284/  792, ite: 53662] train loss: 4.928628, tar: 0.515126 
l0: 0.612056, l1: 0.614201, l2: 0.614794, l3: 0.617091, l4: 0.610100, l5: 0.742180, l6: 1.016660

[epoch:  41/100, batch:   286/  792, ite: 53663] train loss: 4.929175, tar: 0.515184 
l0: 0.584686, l1: 0.588419, l2: 0.588172, l3: 0.591469, l4: 0.648229, l5: 0.736992, l6: 0.908664

[epoch:  41/100, batch:   288/  792, ite: 53664] train loss: 4.929561, tar: 0.515226 
l0: 0.526288, l1: 0.521118, l2: 0.522301, l3: 0.525826, l4: 0.583421, l5: 0.580200, l6: 0.719038

[epoch:  41/100, batch:   290/  792, ite: 53665] train loss: 4.929459, tar: 0.51

[epoch:  41/100, batch:   370/  792, ite: 53705] train loss: 4.936732, tar: 0.516011 
l0: 0.270289, l1: 0.276753, l2: 0.276568, l3: 0.283809, l4: 0.334136, l5: 0.433877, l6: 0.588374

[epoch:  41/100, batch:   372/  792, ite: 53706] train loss: 4.935719, tar: 0.515867 
l0: 0.546749, l1: 0.542629, l2: 0.548107, l3: 0.551571, l4: 0.586028, l5: 0.682221, l6: 0.871298

[epoch:  41/100, batch:   374/  792, ite: 53707] train loss: 4.936080, tar: 0.515885 
l0: 0.435019, l1: 0.436205, l2: 0.437326, l3: 0.446376, l4: 0.475370, l5: 0.623014, l6: 0.909450

[epoch:  41/100, batch:   376/  792, ite: 53708] train loss: 4.935907, tar: 0.515838 
l0: 0.373800, l1: 0.377249, l2: 0.378621, l3: 0.382930, l4: 0.405045, l5: 0.556684, l6: 0.795023

[epoch:  41/100, batch:   378/  792, ite: 53709] train loss: 4.935427, tar: 0.515755 
l0: 0.386265, l1: 0.384785, l2: 0.384738, l3: 0.390161, l4: 0.437025, l5: 0.490253, l6: 0.671985

[epoch:  41/100, batch:   380/  792, ite: 53710] train loss: 4.934828, tar: 0.51

[epoch:  41/100, batch:   460/  792, ite: 53750] train loss: 4.935607, tar: 0.515897 
l0: 0.433093, l1: 0.436192, l2: 0.435541, l3: 0.443781, l4: 0.478530, l5: 0.747925, l6: 0.887583

[epoch:  41/100, batch:   462/  792, ite: 53751] train loss: 4.935604, tar: 0.515849 
l0: 0.596772, l1: 0.601291, l2: 0.602285, l3: 0.605074, l4: 0.615459, l5: 0.697944, l6: 0.896069

[epoch:  41/100, batch:   464/  792, ite: 53752] train loss: 4.936010, tar: 0.515896 
l0: 0.366013, l1: 0.372509, l2: 0.367058, l3: 0.368268, l4: 0.400468, l5: 0.643566, l6: 0.807984

[epoch:  41/100, batch:   466/  792, ite: 53753] train loss: 4.935590, tar: 0.515810 
l0: 0.984782, l1: 0.985057, l2: 0.990177, l3: 0.998554, l4: 1.005428, l5: 1.068074, l6: 1.058067

[epoch:  41/100, batch:   468/  792, ite: 53754] train loss: 4.937576, tar: 0.516078 
l0: 0.439911, l1: 0.448587, l2: 0.443087, l3: 0.445688, l4: 0.490551, l5: 0.571113, l6: 0.667953

[epoch:  41/100, batch:   470/  792, ite: 53755] train loss: 4.937160, tar: 0.51

[epoch:  41/100, batch:   550/  792, ite: 53795] train loss: 4.934237, tar: 0.515598 
l0: 0.301947, l1: 0.300698, l2: 0.300926, l3: 0.304913, l4: 0.332990, l5: 0.425574, l6: 0.485667

[epoch:  41/100, batch:   552/  792, ite: 53796] train loss: 4.933156, tar: 0.515479 
l0: 0.561017, l1: 0.566179, l2: 0.565382, l3: 0.559696, l4: 0.579504, l5: 0.714119, l6: 0.872860

[epoch:  41/100, batch:   554/  792, ite: 53797] train loss: 4.933367, tar: 0.515504 
l0: 0.600790, l1: 0.606589, l2: 0.606138, l3: 0.618163, l4: 0.601707, l5: 0.687132, l6: 0.749222

[epoch:  41/100, batch:   556/  792, ite: 53798] train loss: 4.933599, tar: 0.515552 
l0: 0.501570, l1: 0.499841, l2: 0.502475, l3: 0.507233, l4: 0.510848, l5: 0.658563, l6: 0.797916

[epoch:  41/100, batch:   558/  792, ite: 53799] train loss: 4.933518, tar: 0.515544 
l0: 0.845607, l1: 0.840213, l2: 0.834577, l3: 0.848990, l4: 0.883340, l5: 0.970944, l6: 1.124320

[epoch:  41/100, batch:   560/  792, ite: 53800] train loss: 4.935016, tar: 0.51

[epoch:  41/100, batch:   640/  792, ite: 53840] train loss: 4.928982, tar: 0.514897 
l0: 0.725743, l1: 0.754028, l2: 0.754153, l3: 0.745761, l4: 0.765013, l5: 0.903247, l6: 1.272274

[epoch:  41/100, batch:   642/  792, ite: 53841] train loss: 4.930292, tar: 0.515011 
l0: 0.497360, l1: 0.503677, l2: 0.502813, l3: 0.513300, l4: 0.526924, l5: 0.636537, l6: 0.806146

[epoch:  41/100, batch:   644/  792, ite: 53842] train loss: 4.930257, tar: 0.515002 
l0: 0.254737, l1: 0.255882, l2: 0.254659, l3: 0.264891, l4: 0.294063, l5: 0.450213, l6: 0.561278

[epoch:  41/100, batch:   646/  792, ite: 53843] train loss: 4.929149, tar: 0.514861 
l0: 0.525787, l1: 0.525502, l2: 0.524217, l3: 0.523771, l4: 0.546619, l5: 0.648218, l6: 0.724404

[epoch:  41/100, batch:   648/  792, ite: 53844] train loss: 4.929082, tar: 0.514866 
l0: 0.472564, l1: 0.477379, l2: 0.478316, l3: 0.472534, l4: 0.497495, l5: 0.616821, l6: 0.847279

[epoch:  41/100, batch:   650/  792, ite: 53845] train loss: 4.929003, tar: 0.51

[epoch:  41/100, batch:   730/  792, ite: 53885] train loss: 4.925670, tar: 0.514174 
l0: 0.568248, l1: 0.573961, l2: 0.573147, l3: 0.572559, l4: 0.603344, l5: 0.753253, l6: 0.989749

[epoch:  41/100, batch:   732/  792, ite: 53886] train loss: 4.926062, tar: 0.514203 
l0: 0.437777, l1: 0.442984, l2: 0.443029, l3: 0.454353, l4: 0.471255, l5: 0.520540, l6: 0.601957

[epoch:  41/100, batch:   734/  792, ite: 53887] train loss: 4.925590, tar: 0.514162 
l0: 0.624934, l1: 0.618041, l2: 0.618726, l3: 0.628894, l4: 0.655299, l5: 0.743471, l6: 0.807505

[epoch:  41/100, batch:   736/  792, ite: 53888] train loss: 4.925916, tar: 0.514221 
l0: 0.200932, l1: 0.208714, l2: 0.210330, l3: 0.210502, l4: 0.252007, l5: 0.440939, l6: 0.674205

[epoch:  41/100, batch:   738/  792, ite: 53889] train loss: 4.924863, tar: 0.514055 
l0: 0.762789, l1: 0.765102, l2: 0.767742, l3: 0.776693, l4: 0.806604, l5: 0.832136, l6: 0.968720

[epoch:  41/100, batch:   740/  792, ite: 53890] train loss: 4.925820, tar: 0.51

l0: 0.569687, l1: 0.567612, l2: 0.564848, l3: 0.563036, l4: 0.567332, l5: 0.644948, l6: 0.762090

[epoch:  42/100, batch:    28/  792, ite: 53930] train loss: 4.919184, tar: 0.513513 
l0: 0.403135, l1: 0.409467, l2: 0.408407, l3: 0.397862, l4: 0.397989, l5: 0.549936, l6: 0.726463

[epoch:  42/100, batch:    30/  792, ite: 53931] train loss: 4.918780, tar: 0.513456 
l0: 0.529143, l1: 0.528637, l2: 0.525329, l3: 0.526170, l4: 0.555975, l5: 0.539261, l6: 0.671671

[epoch:  42/100, batch:    32/  792, ite: 53932] train loss: 4.918620, tar: 0.513464 
l0: 0.704679, l1: 0.690768, l2: 0.683988, l3: 0.693802, l4: 0.779178, l5: 0.934344, l6: 1.157049

[epoch:  42/100, batch:    34/  792, ite: 53933] train loss: 4.919631, tar: 0.513563 
l0: 0.917791, l1: 0.929009, l2: 0.928877, l3: 0.924456, l4: 0.915643, l5: 0.930836, l6: 1.002089

[epoch:  42/100, batch:    36/  792, ite: 53934] train loss: 4.921046, tar: 0.513772 
l0: 0.552362, l1: 0.552868, l2: 0.554971, l3: 0.557230, l4: 0.583764, l5: 0.6768

[epoch:  42/100, batch:   116/  792, ite: 53974] train loss: 4.917883, tar: 0.513408 
l0: 0.773046, l1: 0.767727, l2: 0.770780, l3: 0.777177, l4: 0.795601, l5: 0.861532, l6: 0.906718

[epoch:  42/100, batch:   118/  792, ite: 53975] train loss: 4.918762, tar: 0.513540 
l0: 0.410370, l1: 0.418644, l2: 0.419513, l3: 0.419291, l4: 0.428872, l5: 0.534039, l6: 0.603160

[epoch:  42/100, batch:   120/  792, ite: 53976] train loss: 4.918247, tar: 0.513488 
l0: 0.664593, l1: 0.667756, l2: 0.666908, l3: 0.673045, l4: 0.698944, l5: 0.737491, l6: 0.874627

[epoch:  42/100, batch:   122/  792, ite: 53977] train loss: 4.918749, tar: 0.513564 
l0: 0.514210, l1: 0.518032, l2: 0.517141, l3: 0.523322, l4: 0.548612, l5: 0.674491, l6: 0.976847

[epoch:  42/100, batch:   124/  792, ite: 53978] train loss: 4.918944, tar: 0.513564 
l0: 0.276192, l1: 0.281604, l2: 0.279051, l3: 0.282080, l4: 0.293493, l5: 0.436024, l6: 0.676072

[epoch:  42/100, batch:   126/  792, ite: 53979] train loss: 4.918118, tar: 0.51

[epoch:  42/100, batch:   206/  792, ite: 54019] train loss: 4.436517, tar: 0.452443 
l0: 0.419363, l1: 0.418203, l2: 0.418241, l3: 0.436354, l4: 0.462128, l5: 0.551852, l6: 0.646113

[epoch:  42/100, batch:   208/  792, ite: 54020] train loss: 4.420320, tar: 0.450789 
l0: 0.325931, l1: 0.329041, l2: 0.328040, l3: 0.326713, l4: 0.350723, l5: 0.506608, l6: 0.699084

[epoch:  42/100, batch:   210/  792, ite: 54021] train loss: 4.383029, tar: 0.444844 
l0: 0.305298, l1: 0.305282, l2: 0.310001, l3: 0.322109, l4: 0.344191, l5: 0.560903, l6: 0.649843

[epoch:  42/100, batch:   212/  792, ite: 54022] train loss: 4.345247, tar: 0.438501 
l0: 0.960451, l1: 0.976766, l2: 0.949311, l3: 0.943619, l4: 0.989421, l5: 0.991430, l6: 1.102765

[epoch:  42/100, batch:   214/  792, ite: 54023] train loss: 4.507196, tar: 0.461194 
l0: 0.441378, l1: 0.443745, l2: 0.449849, l3: 0.455494, l4: 0.472468, l5: 0.563402, l6: 0.693362

[epoch:  42/100, batch:   216/  792, ite: 54024] train loss: 4.500768, tar: 0.46

[epoch:  42/100, batch:   296/  792, ite: 54064] train loss: 4.698491, tar: 0.481632 
l0: 0.496256, l1: 0.498738, l2: 0.506871, l3: 0.506028, l4: 0.522178, l5: 0.678479, l6: 0.934740

[epoch:  42/100, batch:   298/  792, ite: 54065] train loss: 4.704097, tar: 0.481857 
l0: 0.389748, l1: 0.398506, l2: 0.395566, l3: 0.399658, l4: 0.420339, l5: 0.498805, l6: 0.714782

[epoch:  42/100, batch:   300/  792, ite: 54066] train loss: 4.693692, tar: 0.480462 
l0: 0.646945, l1: 0.651915, l2: 0.653648, l3: 0.648673, l4: 0.680539, l5: 0.759143, l6: 0.996493

[epoch:  42/100, batch:   302/  792, ite: 54067] train loss: 4.713802, tar: 0.482947 
l0: 0.310251, l1: 0.310425, l2: 0.309521, l3: 0.313060, l4: 0.343041, l5: 0.440467, l6: 0.566347

[epoch:  42/100, batch:   304/  792, ite: 54068] train loss: 4.690534, tar: 0.480407 
l0: 0.515196, l1: 0.515225, l2: 0.516362, l3: 0.513795, l4: 0.526134, l5: 0.594210, l6: 0.670937

[epoch:  42/100, batch:   306/  792, ite: 54069] train loss: 4.688431, tar: 0.48

[epoch:  42/100, batch:   386/  792, ite: 54109] train loss: 4.950092, tar: 0.518115 
l0: 0.440347, l1: 0.444136, l2: 0.444307, l3: 0.434280, l4: 0.432225, l5: 0.488158, l6: 0.554408

[epoch:  42/100, batch:   388/  792, ite: 54110] train loss: 4.940902, tar: 0.517408 
l0: 0.236943, l1: 0.238288, l2: 0.241554, l3: 0.243722, l4: 0.263403, l5: 0.421836, l6: 0.580065

[epoch:  42/100, batch:   390/  792, ite: 54111] train loss: 4.922197, tar: 0.514881 
l0: 0.302773, l1: 0.305215, l2: 0.309896, l3: 0.321768, l4: 0.329558, l5: 0.450304, l6: 0.548840

[epoch:  42/100, batch:   392/  792, ite: 54112] train loss: 4.906358, tar: 0.512988 
l0: 0.309266, l1: 0.313901, l2: 0.312626, l3: 0.311458, l4: 0.338086, l5: 0.429458, l6: 0.532001

[epoch:  42/100, batch:   394/  792, ite: 54113] train loss: 4.890337, tar: 0.511185 
l0: 0.523674, l1: 0.524203, l2: 0.525421, l3: 0.540204, l4: 0.596891, l5: 0.675314, l6: 0.882998

[epoch:  42/100, batch:   396/  792, ite: 54114] train loss: 4.892971, tar: 0.51

[epoch:  42/100, batch:   476/  792, ite: 54154] train loss: 4.938348, tar: 0.516770 
l0: 0.414904, l1: 0.403371, l2: 0.396626, l3: 0.384451, l4: 0.412955, l5: 0.383818, l6: 0.429342

[epoch:  42/100, batch:   478/  792, ite: 54155] train loss: 4.928289, tar: 0.516113 
l0: 0.489414, l1: 0.486052, l2: 0.485827, l3: 0.494629, l4: 0.527009, l5: 0.611300, l6: 0.884889

[epoch:  42/100, batch:   480/  792, ite: 54156] train loss: 4.927987, tar: 0.515941 
l0: 0.386134, l1: 0.390253, l2: 0.389816, l3: 0.389697, l4: 0.413066, l5: 0.483473, l6: 0.583626

[epoch:  42/100, batch:   482/  792, ite: 54157] train loss: 4.920099, tar: 0.515115 
l0: 0.336124, l1: 0.345196, l2: 0.349711, l3: 0.366951, l4: 0.382153, l5: 0.479984, l6: 0.691373

[epoch:  42/100, batch:   484/  792, ite: 54158] train loss: 4.912343, tar: 0.513982 
l0: 0.330036, l1: 0.332534, l2: 0.331916, l3: 0.330908, l4: 0.355700, l5: 0.403908, l6: 0.472721

[epoch:  42/100, batch:   486/  792, ite: 54159] train loss: 4.900597, tar: 0.51

[epoch:  42/100, batch:   566/  792, ite: 54199] train loss: 4.901232, tar: 0.512186 
l0: 0.445846, l1: 0.445679, l2: 0.443083, l3: 0.440118, l4: 0.481274, l5: 0.628455, l6: 0.786394

[epoch:  42/100, batch:   568/  792, ite: 54200] train loss: 4.899404, tar: 0.511854 
l0: 0.319707, l1: 0.322307, l2: 0.318542, l3: 0.321765, l4: 0.353951, l5: 0.437853, l6: 0.522538

[epoch:  42/100, batch:   570/  792, ite: 54201] train loss: 4.890900, tar: 0.510899 
l0: 0.437977, l1: 0.441391, l2: 0.443022, l3: 0.456668, l4: 0.489648, l5: 0.621817, l6: 0.837499

[epoch:  42/100, batch:   572/  792, ite: 54202] train loss: 4.889881, tar: 0.510538 
l0: 0.490559, l1: 0.491196, l2: 0.491901, l3: 0.502931, l4: 0.505500, l5: 0.709760, l6: 0.856937

[epoch:  42/100, batch:   574/  792, ite: 54203] train loss: 4.890035, tar: 0.510439 
l0: 1.059551, l1: 1.079051, l2: 1.076419, l3: 1.060572, l4: 1.060401, l5: 1.211545, l6: 1.340873

[epoch:  42/100, batch:   576/  792, ite: 54204] train loss: 4.911202, tar: 0.51

[epoch:  42/100, batch:   656/  792, ite: 54244] train loss: 4.908163, tar: 0.511768 
l0: 0.313320, l1: 0.316210, l2: 0.320545, l3: 0.337428, l4: 0.383694, l5: 0.506641, l6: 0.691651

[epoch:  42/100, batch:   658/  792, ite: 54245] train loss: 4.902932, tar: 0.510958 
l0: 0.503033, l1: 0.508579, l2: 0.506294, l3: 0.512050, l4: 0.539531, l5: 0.593987, l6: 0.749633

[epoch:  42/100, batch:   660/  792, ite: 54246] train loss: 4.902596, tar: 0.510926 
l0: 0.427045, l1: 0.431721, l2: 0.430891, l3: 0.436414, l4: 0.453415, l5: 0.519545, l6: 0.715338

[epoch:  42/100, batch:   662/  792, ite: 54247] train loss: 4.900196, tar: 0.510587 
l0: 1.007867, l1: 1.040315, l2: 1.033787, l3: 1.026257, l4: 1.034393, l5: 1.086042, l6: 1.113280

[epoch:  42/100, batch:   664/  792, ite: 54248] train loss: 4.914680, tar: 0.512592 
l0: 0.393535, l1: 0.396169, l2: 0.390353, l3: 0.384057, l4: 0.389648, l5: 0.404606, l6: 0.539027

[epoch:  42/100, batch:   666/  792, ite: 54249] train loss: 4.909306, tar: 0.51

[epoch:  42/100, batch:   746/  792, ite: 54289] train loss: 4.889667, tar: 0.510338 
l0: 0.628891, l1: 0.621363, l2: 0.625506, l3: 0.624554, l4: 0.626151, l5: 0.766190, l6: 0.985387

[epoch:  42/100, batch:   748/  792, ite: 54290] train loss: 4.892868, tar: 0.510746 
l0: 0.324901, l1: 0.326682, l2: 0.330155, l3: 0.328790, l4: 0.384854, l5: 0.440152, l6: 0.646885

[epoch:  42/100, batch:   750/  792, ite: 54291] train loss: 4.888269, tar: 0.510108 
l0: 0.666580, l1: 0.669639, l2: 0.671165, l3: 0.672042, l4: 0.695089, l5: 0.706514, l6: 0.901163

[epoch:  42/100, batch:   752/  792, ite: 54292] train loss: 4.892551, tar: 0.510644 
l0: 0.382669, l1: 0.386152, l2: 0.380757, l3: 0.379822, l4: 0.398527, l5: 0.500858, l6: 0.670897

[epoch:  42/100, batch:   754/  792, ite: 54293] train loss: 4.889036, tar: 0.510207 
l0: 0.475000, l1: 0.484117, l2: 0.483333, l3: 0.484721, l4: 0.474650, l5: 0.612085, l6: 0.708653

[epoch:  42/100, batch:   756/  792, ite: 54294] train loss: 4.887694, tar: 0.51

l0: 0.379988, l1: 0.377703, l2: 0.376201, l3: 0.382204, l4: 0.405564, l5: 0.497495, l6: 0.635976

[epoch:  43/100, batch:    44/  792, ite: 54334] train loss: 4.922484, tar: 0.514010 
l0: 0.527138, l1: 0.523764, l2: 0.526985, l3: 0.537382, l4: 0.550357, l5: 0.633873, l6: 1.040729

[epoch:  43/100, batch:    46/  792, ite: 54335] train loss: 4.923486, tar: 0.514049 
l0: 0.222335, l1: 0.234020, l2: 0.238015, l3: 0.247106, l4: 0.292694, l5: 0.420709, l6: 0.621248

[epoch:  43/100, batch:    48/  792, ite: 54336] train loss: 4.917591, tar: 0.513181 
l0: 0.372120, l1: 0.375267, l2: 0.377390, l3: 0.380037, l4: 0.408952, l5: 0.495191, l6: 0.646880

[epoch:  43/100, batch:    50/  792, ite: 54337] train loss: 4.914291, tar: 0.512762 
l0: 0.447080, l1: 0.442187, l2: 0.441241, l3: 0.443609, l4: 0.473946, l5: 0.570543, l6: 0.663112

[epoch:  43/100, batch:    52/  792, ite: 54338] train loss: 4.912210, tar: 0.512568 
l0: 1.816689, l1: 1.820143, l2: 1.842056, l3: 1.877530, l4: 1.895733, l5: 1.9685

[epoch:  43/100, batch:   132/  792, ite: 54378] train loss: 4.934234, tar: 0.515030 
l0: 0.355205, l1: 0.359985, l2: 0.359261, l3: 0.379387, l4: 0.413389, l5: 0.516233, l6: 0.909939

[epoch:  43/100, batch:   134/  792, ite: 54379] train loss: 4.932334, tar: 0.514608 
l0: 0.233458, l1: 0.240785, l2: 0.240113, l3: 0.253573, l4: 0.294142, l5: 0.371636, l6: 0.553566

[epoch:  43/100, batch:   136/  792, ite: 54380] train loss: 4.926579, tar: 0.513868 
l0: 0.171476, l1: 0.175248, l2: 0.175235, l3: 0.183578, l4: 0.210577, l5: 0.279480, l6: 0.397480

[epoch:  43/100, batch:   138/  792, ite: 54381] train loss: 4.918923, tar: 0.512969 
l0: 0.340263, l1: 0.327525, l2: 0.324476, l3: 0.332260, l4: 0.370258, l5: 0.474913, l6: 0.660833

[epoch:  43/100, batch:   140/  792, ite: 54382] train loss: 4.915152, tar: 0.512517 
l0: 0.877845, l1: 0.881714, l2: 0.884908, l3: 0.899209, l4: 0.939369, l5: 1.072802, l6: 1.210459

[epoch:  43/100, batch:   142/  792, ite: 54383] train loss: 4.923372, tar: 0.51

[epoch:  43/100, batch:   222/  792, ite: 54423] train loss: 4.889128, tar: 0.509028 
l0: 0.254720, l1: 0.260672, l2: 0.258740, l3: 0.270254, l4: 0.284016, l5: 0.392999, l6: 0.531171

[epoch:  43/100, batch:   224/  792, ite: 54424] train loss: 4.884176, tar: 0.508428 
l0: 0.604165, l1: 0.611671, l2: 0.608245, l3: 0.609367, l4: 0.614768, l5: 0.736945, l6: 1.405005

[epoch:  43/100, batch:   226/  792, ite: 54425] train loss: 4.888262, tar: 0.508653 
l0: 0.946977, l1: 0.949275, l2: 0.952183, l3: 0.952475, l4: 0.997080, l5: 1.090930, l6: 1.136110

[epoch:  43/100, batch:   228/  792, ite: 54426] train loss: 4.896313, tar: 0.509682 
l0: 0.446967, l1: 0.454528, l2: 0.455559, l3: 0.467970, l4: 0.495431, l5: 0.632869, l6: 0.852693

[epoch:  43/100, batch:   230/  792, ite: 54427] train loss: 4.896115, tar: 0.509535 
l0: 0.784301, l1: 0.787013, l2: 0.781422, l3: 0.788718, l4: 0.818524, l5: 0.889771, l6: 1.080262

[epoch:  43/100, batch:   232/  792, ite: 54428] train loss: 4.901516, tar: 0.51

[epoch:  43/100, batch:   312/  792, ite: 54468] train loss: 4.876845, tar: 0.507276 
l0: 0.648853, l1: 0.650838, l2: 0.650907, l3: 0.652214, l4: 0.660007, l5: 0.700508, l6: 0.920807

[epoch:  43/100, batch:   314/  792, ite: 54469] train loss: 4.878893, tar: 0.507578 
l0: 0.692642, l1: 0.699863, l2: 0.702332, l3: 0.704584, l4: 0.709488, l5: 0.707634, l6: 0.794348

[epoch:  43/100, batch:   316/  792, ite: 54470] train loss: 4.881097, tar: 0.507972 
l0: 0.532022, l1: 0.529130, l2: 0.539210, l3: 0.545563, l4: 0.580021, l5: 0.708776, l6: 0.772187

[epoch:  43/100, batch:   318/  792, ite: 54471] train loss: 4.881320, tar: 0.508023 
l0: 0.426846, l1: 0.431054, l2: 0.439227, l3: 0.463081, l4: 0.490915, l5: 0.578253, l6: 0.690782

[epoch:  43/100, batch:   320/  792, ite: 54472] train loss: 4.879907, tar: 0.507851 
l0: 0.685803, l1: 0.693667, l2: 0.690739, l3: 0.690672, l4: 0.681373, l5: 0.739922, l6: 0.904709

[epoch:  43/100, batch:   322/  792, ite: 54473] train loss: 4.882628, tar: 0.50

[epoch:  43/100, batch:   402/  792, ite: 54513] train loss: 4.867896, tar: 0.507139 
l0: 0.276147, l1: 0.275565, l2: 0.281483, l3: 0.282059, l4: 0.301853, l5: 0.410604, l6: 0.535303

[epoch:  43/100, batch:   404/  792, ite: 54514] train loss: 4.864153, tar: 0.506689 
l0: 0.387801, l1: 0.389275, l2: 0.389778, l3: 0.406060, l4: 0.429279, l5: 0.528239, l6: 0.729211

[epoch:  43/100, batch:   406/  792, ite: 54515] train loss: 4.862420, tar: 0.506459 
l0: 0.501385, l1: 0.507132, l2: 0.504840, l3: 0.515143, l4: 0.527886, l5: 0.553834, l6: 0.777203

[epoch:  43/100, batch:   408/  792, ite: 54516] train loss: 4.862052, tar: 0.506449 
l0: 0.362154, l1: 0.361117, l2: 0.359974, l3: 0.362453, l4: 0.417100, l5: 0.505350, l6: 0.549330

[epoch:  43/100, batch:   410/  792, ite: 54517] train loss: 4.859561, tar: 0.506170 
l0: 0.664011, l1: 0.679221, l2: 0.674120, l3: 0.677915, l4: 0.683046, l5: 0.746298, l6: 0.796959

[epoch:  43/100, batch:   412/  792, ite: 54518] train loss: 4.861162, tar: 0.50

[epoch:  43/100, batch:   492/  792, ite: 54558] train loss: 4.865050, tar: 0.506883 
l0: 0.377898, l1: 0.379091, l2: 0.379213, l3: 0.393248, l4: 0.422866, l5: 0.512225, l6: 0.617754

[epoch:  43/100, batch:   494/  792, ite: 54559] train loss: 4.863062, tar: 0.506653 
l0: 0.441230, l1: 0.441325, l2: 0.445100, l3: 0.439560, l4: 0.455364, l5: 0.517635, l6: 0.804980

[epoch:  43/100, batch:   496/  792, ite: 54560] train loss: 4.862315, tar: 0.506536 
l0: 0.538728, l1: 0.541041, l2: 0.539040, l3: 0.539396, l4: 0.534213, l5: 0.619279, l6: 0.675349

[epoch:  43/100, batch:   498/  792, ite: 54561] train loss: 4.862175, tar: 0.506593 
l0: 0.422214, l1: 0.425447, l2: 0.426672, l3: 0.423767, l4: 0.457224, l5: 0.527781, l6: 0.657791

[epoch:  43/100, batch:   500/  792, ite: 54562] train loss: 4.860644, tar: 0.506443 
l0: 0.388953, l1: 0.391231, l2: 0.392758, l3: 0.402744, l4: 0.408036, l5: 0.498343, l6: 0.782187

[epoch:  43/100, batch:   502/  792, ite: 54563] train loss: 4.859499, tar: 0.50

[epoch:  43/100, batch:   582/  792, ite: 54603] train loss: 4.832759, tar: 0.502632 
l0: 0.415071, l1: 0.420894, l2: 0.421581, l3: 0.425759, l4: 0.464467, l5: 0.635272, l6: 0.813044

[epoch:  43/100, batch:   584/  792, ite: 54604] train loss: 4.832013, tar: 0.502487 
l0: 0.383261, l1: 0.386090, l2: 0.389994, l3: 0.393625, l4: 0.425975, l5: 0.557989, l6: 0.661798

[epoch:  43/100, batch:   586/  792, ite: 54605] train loss: 4.830585, tar: 0.502290 
l0: 0.834175, l1: 0.837813, l2: 0.829171, l3: 0.821149, l4: 0.907074, l5: 0.895375, l6: 0.938783

[epoch:  43/100, batch:   588/  792, ite: 54606] train loss: 4.834561, tar: 0.502838 
l0: 0.387188, l1: 0.389018, l2: 0.396016, l3: 0.397617, l4: 0.420941, l5: 0.531843, l6: 0.939861

[epoch:  43/100, batch:   590/  792, ite: 54607] train loss: 4.833703, tar: 0.502648 
l0: 0.707999, l1: 0.719409, l2: 0.713339, l3: 0.711874, l4: 0.745903, l5: 0.839153, l6: 0.907996

[epoch:  43/100, batch:   592/  792, ite: 54608] train loss: 4.836237, tar: 0.50

[epoch:  43/100, batch:   672/  792, ite: 54648] train loss: 4.862566, tar: 0.506576 
l0: 0.384807, l1: 0.388331, l2: 0.389660, l3: 0.392029, l4: 0.414447, l5: 0.502832, l6: 0.641381

[epoch:  43/100, batch:   674/  792, ite: 54649] train loss: 4.860862, tar: 0.506388 
l0: 0.958967, l1: 0.974328, l2: 0.978904, l3: 0.981437, l4: 1.064078, l5: 1.134181, l6: 1.115149

[epoch:  43/100, batch:   676/  792, ite: 54650] train loss: 4.866147, tar: 0.507084 
l0: 0.423219, l1: 0.426465, l2: 0.429370, l3: 0.436474, l4: 0.463939, l5: 0.578077, l6: 0.638282

[epoch:  43/100, batch:   678/  792, ite: 54651] train loss: 4.865331, tar: 0.506955 
l0: 0.614386, l1: 0.620935, l2: 0.617583, l3: 0.630511, l4: 0.652869, l5: 0.698905, l6: 0.879795

[epoch:  43/100, batch:   680/  792, ite: 54652] train loss: 4.866648, tar: 0.507120 
l0: 0.467622, l1: 0.474791, l2: 0.475201, l3: 0.475631, l4: 0.511083, l5: 0.651693, l6: 0.885474

[epoch:  43/100, batch:   682/  792, ite: 54653] train loss: 4.866735, tar: 0.50

[epoch:  43/100, batch:   762/  792, ite: 54693] train loss: 4.876395, tar: 0.507576 
l0: 0.417376, l1: 0.422278, l2: 0.425140, l3: 0.424878, l4: 0.459809, l5: 0.623783, l6: 0.893548

[epoch:  43/100, batch:   764/  792, ite: 54694] train loss: 4.876147, tar: 0.507446 
l0: 0.657555, l1: 0.662621, l2: 0.661983, l3: 0.664560, l4: 0.694265, l5: 0.743485, l6: 0.909086

[epoch:  43/100, batch:   766/  792, ite: 54695] train loss: 4.877519, tar: 0.507662 
l0: 0.648992, l1: 0.655200, l2: 0.655947, l3: 0.656426, l4: 0.685850, l5: 0.807595, l6: 1.027264

[epoch:  43/100, batch:   768/  792, ite: 54696] train loss: 4.879427, tar: 0.507865 
l0: 0.398041, l1: 0.406000, l2: 0.406963, l3: 0.415088, l4: 0.425550, l5: 0.505170, l6: 0.542423

[epoch:  43/100, batch:   770/  792, ite: 54697] train loss: 4.877613, tar: 0.507708 
l0: 0.278087, l1: 0.275109, l2: 0.278594, l3: 0.287049, l4: 0.329074, l5: 0.426116, l6: 0.510692

[epoch:  43/100, batch:   772/  792, ite: 54698] train loss: 4.874748, tar: 0.50

l0: 0.569182, l1: 0.569607, l2: 0.571443, l3: 0.584673, l4: 0.631049, l5: 0.733502, l6: 0.943269

[epoch:  44/100, batch:    60/  792, ite: 54738] train loss: 4.869148, tar: 0.506514 
l0: 0.394888, l1: 0.390329, l2: 0.390984, l3: 0.404494, l4: 0.434738, l5: 0.487827, l6: 0.666166

[epoch:  44/100, batch:    62/  792, ite: 54739] train loss: 4.867819, tar: 0.506363 
l0: 0.276838, l1: 0.279178, l2: 0.277020, l3: 0.273856, l4: 0.302959, l5: 0.497388, l6: 0.627266

[epoch:  44/100, batch:    64/  792, ite: 54740] train loss: 4.865573, tar: 0.506053 
l0: 0.448736, l1: 0.455296, l2: 0.456035, l3: 0.459678, l4: 0.488655, l5: 0.551250, l6: 0.802808

[epoch:  44/100, batch:    66/  792, ite: 54741] train loss: 4.865112, tar: 0.505975 
l0: 0.374998, l1: 0.374172, l2: 0.378103, l3: 0.377297, l4: 0.407773, l5: 0.481136, l6: 0.734617

[epoch:  44/100, batch:    68/  792, ite: 54742] train loss: 4.863719, tar: 0.505799 
l0: 0.794383, l1: 0.806593, l2: 0.803840, l3: 0.806688, l4: 0.827468, l5: 0.9038

[epoch:  44/100, batch:   148/  792, ite: 54782] train loss: 4.864023, tar: 0.505901 
l0: 0.382966, l1: 0.384318, l2: 0.383726, l3: 0.389499, l4: 0.422143, l5: 0.479676, l6: 0.720240

[epoch:  44/100, batch:   150/  792, ite: 54783] train loss: 4.862744, tar: 0.505744 
l0: 0.245477, l1: 0.250058, l2: 0.250253, l3: 0.259056, l4: 0.270941, l5: 0.375386, l6: 0.548771

[epoch:  44/100, batch:   152/  792, ite: 54784] train loss: 4.860061, tar: 0.505412 
l0: 0.455024, l1: 0.454179, l2: 0.454525, l3: 0.459887, l4: 0.486516, l5: 0.554825, l6: 0.806559

[epoch:  44/100, batch:   154/  792, ite: 54785] train loss: 4.859695, tar: 0.505348 
l0: 0.477545, l1: 0.481765, l2: 0.478297, l3: 0.476515, l4: 0.498409, l5: 0.536035, l6: 0.712618

[epoch:  44/100, batch:   156/  792, ite: 54786] train loss: 4.859080, tar: 0.505313 
l0: 0.324209, l1: 0.325574, l2: 0.323878, l3: 0.336384, l4: 0.358163, l5: 0.484435, l6: 0.614743

[epoch:  44/100, batch:   158/  792, ite: 54787] train loss: 4.857317, tar: 0.50

[epoch:  44/100, batch:   238/  792, ite: 54827] train loss: 4.875267, tar: 0.508330 
l0: 0.412932, l1: 0.414320, l2: 0.413976, l3: 0.418082, l4: 0.437396, l5: 0.587356, l6: 0.673601

[epoch:  44/100, batch:   240/  792, ite: 54828] train loss: 4.874283, tar: 0.508215 
l0: 0.347774, l1: 0.352157, l2: 0.353488, l3: 0.369655, l4: 0.399119, l5: 0.543527, l6: 0.843904

[epoch:  44/100, batch:   242/  792, ite: 54829] train loss: 4.873323, tar: 0.508022 
l0: 0.438548, l1: 0.445255, l2: 0.442289, l3: 0.450824, l4: 0.498037, l5: 0.563327, l6: 0.719742

[epoch:  44/100, batch:   244/  792, ite: 54830] train loss: 4.872783, tar: 0.507938 
l0: 0.465916, l1: 0.462196, l2: 0.461071, l3: 0.470713, l4: 0.474881, l5: 0.619496, l6: 0.670158

[epoch:  44/100, batch:   246/  792, ite: 54831] train loss: 4.872246, tar: 0.507887 
l0: 0.174706, l1: 0.181518, l2: 0.178651, l3: 0.181995, l4: 0.213958, l5: 0.339719, l6: 0.773517

[epoch:  44/100, batch:   248/  792, ite: 54832] train loss: 4.869447, tar: 0.50

[epoch:  44/100, batch:   328/  792, ite: 54872] train loss: 4.855542, tar: 0.505613 
l0: 0.802814, l1: 0.813330, l2: 0.815400, l3: 0.823965, l4: 0.822990, l5: 0.874367, l6: 1.342070

[epoch:  44/100, batch:   330/  792, ite: 54873] train loss: 4.858762, tar: 0.505953 
l0: 0.402069, l1: 0.400251, l2: 0.401247, l3: 0.400440, l4: 0.433120, l5: 0.603502, l6: 0.833192

[epoch:  44/100, batch:   332/  792, ite: 54874] train loss: 4.858236, tar: 0.505834 
l0: 0.670501, l1: 0.672341, l2: 0.671011, l3: 0.675329, l4: 0.695674, l5: 0.766895, l6: 0.867593

[epoch:  44/100, batch:   334/  792, ite: 54875] train loss: 4.859442, tar: 0.506022 
l0: 0.503049, l1: 0.509188, l2: 0.508802, l3: 0.514747, l4: 0.508363, l5: 0.566794, l6: 0.631263

[epoch:  44/100, batch:   336/  792, ite: 54876] train loss: 4.858934, tar: 0.506019 
l0: 0.656562, l1: 0.655108, l2: 0.657864, l3: 0.660102, l4: 0.716875, l5: 0.843474, l6: 1.273833

[epoch:  44/100, batch:   338/  792, ite: 54877] train loss: 4.861226, tar: 0.50

[epoch:  44/100, batch:   418/  792, ite: 54917] train loss: 4.860083, tar: 0.506019 
l0: 0.393747, l1: 0.393144, l2: 0.395709, l3: 0.409546, l4: 0.452643, l5: 0.576378, l6: 0.761200

[epoch:  44/100, batch:   420/  792, ite: 54918] train loss: 4.859283, tar: 0.505897 
l0: 0.816148, l1: 0.810813, l2: 0.807937, l3: 0.802267, l4: 0.840256, l5: 0.978995, l6: 1.092565

[epoch:  44/100, batch:   422/  792, ite: 54919] train loss: 4.861891, tar: 0.506234 
l0: 0.707110, l1: 0.693367, l2: 0.692934, l3: 0.700486, l4: 0.727836, l5: 0.800979, l6: 0.899399

[epoch:  44/100, batch:   424/  792, ite: 54920] train loss: 4.863318, tar: 0.506452 
l0: 0.863253, l1: 0.859293, l2: 0.857578, l3: 0.870301, l4: 0.904776, l5: 1.045023, l6: 1.469165

[epoch:  44/100, batch:   426/  792, ite: 54921] train loss: 4.867036, tar: 0.506840 
l0: 0.829099, l1: 0.820867, l2: 0.824651, l3: 0.819523, l4: 0.858613, l5: 0.969093, l6: 1.123194

[epoch:  44/100, batch:   428/  792, ite: 54922] train loss: 4.869842, tar: 0.50

[epoch:  44/100, batch:   508/  792, ite: 54962] train loss: 4.868927, tar: 0.506705 
l0: 0.452225, l1: 0.452919, l2: 0.452934, l3: 0.461385, l4: 0.493333, l5: 0.674290, l6: 0.748667

[epoch:  44/100, batch:   510/  792, ite: 54963] train loss: 4.868616, tar: 0.506648 
l0: 0.559425, l1: 0.572632, l2: 0.567072, l3: 0.558169, l4: 0.549227, l5: 0.605520, l6: 0.738729

[epoch:  44/100, batch:   512/  792, ite: 54964] train loss: 4.868726, tar: 0.506703 
l0: 0.664280, l1: 0.672418, l2: 0.674215, l3: 0.668504, l4: 0.698676, l5: 0.791022, l6: 0.946144

[epoch:  44/100, batch:   514/  792, ite: 54965] train loss: 4.870244, tar: 0.506866 
l0: 0.638746, l1: 0.643980, l2: 0.641232, l3: 0.651129, l4: 0.713777, l5: 0.842232, l6: 1.085347

[epoch:  44/100, batch:   516/  792, ite: 54966] train loss: 4.871811, tar: 0.507003 
l0: 0.622099, l1: 0.626781, l2: 0.630433, l3: 0.628876, l4: 0.628551, l5: 0.685297, l6: 0.920842

[epoch:  44/100, batch:   518/  792, ite: 54967] train loss: 4.872800, tar: 0.50

[epoch:  44/100, batch:   598/  792, ite: 55007] train loss: 4.853106, tar: 0.504707 
l0: 1.001215, l1: 1.023549, l2: 1.033050, l3: 1.052371, l4: 1.040362, l5: 0.962569, l6: 1.013965

[epoch:  44/100, batch:   600/  792, ite: 55008] train loss: 4.856328, tar: 0.505199 
l0: 0.262665, l1: 0.265780, l2: 0.265358, l3: 0.269885, l4: 0.306506, l5: 0.372575, l6: 0.541849

[epoch:  44/100, batch:   602/  792, ite: 55009] train loss: 4.854298, tar: 0.504959 
l0: 0.477009, l1: 0.480415, l2: 0.478926, l3: 0.480453, l4: 0.498727, l5: 0.625587, l6: 0.770638

[epoch:  44/100, batch:   604/  792, ite: 55010] train loss: 4.854289, tar: 0.504931 
l0: 0.382327, l1: 0.381780, l2: 0.381717, l3: 0.387055, l4: 0.392086, l5: 0.451595, l6: 0.520203

[epoch:  44/100, batch:   606/  792, ite: 55011] train loss: 4.852874, tar: 0.504810 
l0: 0.649758, l1: 0.665091, l2: 0.663125, l3: 0.667269, l4: 0.682508, l5: 0.719419, l6: 1.032358

[epoch:  44/100, batch:   608/  792, ite: 55012] train loss: 4.854105, tar: 0.50

[epoch:  44/100, batch:   688/  792, ite: 55052] train loss: 4.861497, tar: 0.505981 
l0: 0.363152, l1: 0.367003, l2: 0.368814, l3: 0.379507, l4: 0.431204, l5: 0.545725, l6: 0.770378

[epoch:  44/100, batch:   690/  792, ite: 55053] train loss: 4.860880, tar: 0.505845 
l0: 1.101874, l1: 1.093196, l2: 1.105131, l3: 1.126811, l4: 1.183450, l5: 1.230692, l6: 1.439072

[epoch:  44/100, batch:   692/  792, ite: 55054] train loss: 4.865712, tar: 0.506411 
l0: 1.141859, l1: 1.129986, l2: 1.130739, l3: 1.127344, l4: 1.151575, l5: 1.234167, l6: 1.449004

[epoch:  44/100, batch:   694/  792, ite: 55055] train loss: 4.870379, tar: 0.507013 
l0: 0.881383, l1: 0.886038, l2: 0.886475, l3: 0.898106, l4: 0.917163, l5: 0.909029, l6: 1.017091

[epoch:  44/100, batch:   696/  792, ite: 55056] train loss: 4.872885, tar: 0.507368 
l0: 1.030878, l1: 1.017640, l2: 1.024087, l3: 1.020706, l4: 0.998467, l5: 1.099094, l6: 1.310305

[epoch:  44/100, batch:   698/  792, ite: 55057] train loss: 4.876571, tar: 0.50

[epoch:  44/100, batch:   778/  792, ite: 55097] train loss: 4.862552, tar: 0.506451 
l0: 0.799242, l1: 0.813237, l2: 0.810439, l3: 0.811253, l4: 0.818722, l5: 0.835803, l6: 0.964601

[epoch:  44/100, batch:   780/  792, ite: 55098] train loss: 4.864503, tar: 0.506718 
l0: 0.234932, l1: 0.235859, l2: 0.234369, l3: 0.236128, l4: 0.249350, l5: 0.339807, l6: 0.439200

[epoch:  44/100, batch:   782/  792, ite: 55099] train loss: 4.862263, tar: 0.506471 
l0: 0.461384, l1: 0.468057, l2: 0.468514, l3: 0.472782, l4: 0.489961, l5: 0.567376, l6: 0.769463

[epoch:  44/100, batch:   784/  792, ite: 55100] train loss: 4.861995, tar: 0.506430 
l0: 0.823692, l1: 0.824132, l2: 0.821265, l3: 0.819030, l4: 0.824695, l5: 0.897666, l6: 1.139450

[epoch:  44/100, batch:   786/  792, ite: 55101] train loss: 4.864246, tar: 0.506718 
l0: 0.956580, l1: 0.961343, l2: 0.955723, l3: 0.968992, l4: 0.980019, l5: 1.067782, l6: 1.103553

[epoch:  44/100, batch:   788/  792, ite: 55102] train loss: 4.867187, tar: 0.50

l0: 0.762714, l1: 0.761754, l2: 0.761509, l3: 0.766716, l4: 0.787326, l5: 0.830163, l6: 0.973581

[epoch:  45/100, batch:    76/  792, ite: 55142] train loss: 4.858637, tar: 0.505983 
l0: 0.882997, l1: 0.897584, l2: 0.889030, l3: 0.893379, l4: 0.926399, l5: 1.034517, l6: 1.185496

[epoch:  45/100, batch:    78/  792, ite: 55143] train loss: 4.861324, tar: 0.506313 
l0: 0.384133, l1: 0.384800, l2: 0.383323, l3: 0.384921, l4: 0.428166, l5: 0.556332, l6: 0.871938

[epoch:  45/100, batch:    80/  792, ite: 55144] train loss: 4.860964, tar: 0.506207 
l0: 0.432538, l1: 0.433688, l2: 0.434017, l3: 0.439713, l4: 0.466861, l5: 0.541124, l6: 0.715894

[epoch:  45/100, batch:    82/  792, ite: 55145] train loss: 4.860535, tar: 0.506142 
l0: 0.449052, l1: 0.456570, l2: 0.460732, l3: 0.461622, l4: 0.491949, l5: 0.762682, l6: 0.958619

[epoch:  45/100, batch:    84/  792, ite: 55146] train loss: 4.860767, tar: 0.506092 
l0: 0.445327, l1: 0.444317, l2: 0.441214, l3: 0.448318, l4: 0.468713, l5: 0.5576

[epoch:  45/100, batch:   164/  792, ite: 55186] train loss: 4.855887, tar: 0.505662 
l0: 0.385019, l1: 0.390452, l2: 0.391397, l3: 0.392306, l4: 0.422354, l5: 0.541492, l6: 0.692461

[epoch:  45/100, batch:   166/  792, ite: 55187] train loss: 4.855163, tar: 0.505561 
l0: 0.364123, l1: 0.363235, l2: 0.365603, l3: 0.380249, l4: 0.421006, l5: 0.532480, l6: 0.576620

[epoch:  45/100, batch:   168/  792, ite: 55188] train loss: 4.854134, tar: 0.505442 
l0: 0.627518, l1: 0.634330, l2: 0.638637, l3: 0.641880, l4: 0.679439, l5: 0.701924, l6: 1.069537

[epoch:  45/100, batch:   170/  792, ite: 55189] train loss: 4.855279, tar: 0.505544 
l0: 0.329502, l1: 0.327845, l2: 0.331040, l3: 0.349954, l4: 0.384095, l5: 0.502872, l6: 0.659833

[epoch:  45/100, batch:   172/  792, ite: 55190] train loss: 4.854214, tar: 0.505396 
l0: 0.650631, l1: 0.660739, l2: 0.658502, l3: 0.659631, l4: 0.691603, l5: 0.884472, l6: 0.964862

[epoch:  45/100, batch:   174/  792, ite: 55191] train loss: 4.855348, tar: 0.50

[epoch:  45/100, batch:   254/  792, ite: 55231] train loss: 4.863117, tar: 0.506421 
l0: 0.884390, l1: 0.884063, l2: 0.876649, l3: 0.880560, l4: 0.892573, l5: 0.926115, l6: 0.920503

[epoch:  45/100, batch:   256/  792, ite: 55232] train loss: 4.865030, tar: 0.506728 
l0: 0.511575, l1: 0.518127, l2: 0.518284, l3: 0.520997, l4: 0.562321, l5: 0.549198, l6: 0.617175

[epoch:  45/100, batch:   258/  792, ite: 55233] train loss: 4.864762, tar: 0.506732 
l0: 0.629816, l1: 0.626462, l2: 0.626984, l3: 0.624881, l4: 0.609135, l5: 0.731779, l6: 0.969157

[epoch:  45/100, batch:   260/  792, ite: 55234] train loss: 4.865546, tar: 0.506832 
l0: 0.245060, l1: 0.246860, l2: 0.243774, l3: 0.252282, l4: 0.280878, l5: 0.393401, l6: 0.548413

[epoch:  45/100, batch:   262/  792, ite: 55235] train loss: 4.863924, tar: 0.506620 
l0: 0.511111, l1: 0.518274, l2: 0.517283, l3: 0.523176, l4: 0.552221, l5: 0.606642, l6: 0.785710

[epoch:  45/100, batch:   264/  792, ite: 55236] train loss: 4.863990, tar: 0.50

[epoch:  45/100, batch:   344/  792, ite: 55276] train loss: 4.867704, tar: 0.507208 
l0: 0.304132, l1: 0.314906, l2: 0.307811, l3: 0.315870, l4: 0.381933, l5: 0.465247, l6: 0.483397

[epoch:  45/100, batch:   346/  792, ite: 55277] train loss: 4.866377, tar: 0.507049 
l0: 0.397965, l1: 0.404471, l2: 0.402293, l3: 0.413351, l4: 0.423975, l5: 0.520792, l6: 1.230157

[epoch:  45/100, batch:   348/  792, ite: 55278] train loss: 4.866522, tar: 0.506964 
l0: 0.794351, l1: 0.808496, l2: 0.808927, l3: 0.809701, l4: 0.804778, l5: 0.998371, l6: 1.279986

[epoch:  45/100, batch:   350/  792, ite: 55279] train loss: 4.868696, tar: 0.507189 
l0: 0.314253, l1: 0.317822, l2: 0.318620, l3: 0.322430, l4: 0.349303, l5: 0.508535, l6: 0.684574

[epoch:  45/100, batch:   352/  792, ite: 55280] train loss: 4.867645, tar: 0.507038 
l0: 0.495477, l1: 0.501180, l2: 0.503231, l3: 0.511594, l4: 0.529788, l5: 0.667048, l6: 1.033307

[epoch:  45/100, batch:   354/  792, ite: 55281] train loss: 4.868001, tar: 0.50

[epoch:  45/100, batch:   434/  792, ite: 55321] train loss: 4.873641, tar: 0.507964 
l0: 0.466535, l1: 0.468934, l2: 0.468660, l3: 0.480179, l4: 0.536863, l5: 0.696486, l6: 0.928036

[epoch:  45/100, batch:   436/  792, ite: 55322] train loss: 4.873761, tar: 0.507933 
l0: 0.317060, l1: 0.319776, l2: 0.314241, l3: 0.310555, l4: 0.332142, l5: 0.490592, l6: 0.640718

[epoch:  45/100, batch:   438/  792, ite: 55323] train loss: 4.872643, tar: 0.507788 
l0: 0.547454, l1: 0.544170, l2: 0.544928, l3: 0.548518, l4: 0.588740, l5: 0.731972, l6: 0.953207

[epoch:  45/100, batch:   440/  792, ite: 55324] train loss: 4.873094, tar: 0.507818 
l0: 0.659519, l1: 0.665981, l2: 0.662542, l3: 0.656741, l4: 0.681854, l5: 0.805205, l6: 0.929939

[epoch:  45/100, batch:   442/  792, ite: 55325] train loss: 4.874036, tar: 0.507933 
l0: 0.581568, l1: 0.581066, l2: 0.582167, l3: 0.584110, l4: 0.601457, l5: 0.728363, l6: 0.859905

[epoch:  45/100, batch:   444/  792, ite: 55326] train loss: 4.874533, tar: 0.50

[epoch:  45/100, batch:   524/  792, ite: 55366] train loss: 4.867340, tar: 0.507123 
l0: 0.324100, l1: 0.322876, l2: 0.322882, l3: 0.320621, l4: 0.344519, l5: 0.400672, l6: 0.592055

[epoch:  45/100, batch:   526/  792, ite: 55367] train loss: 4.866227, tar: 0.506989 
l0: 0.910844, l1: 0.950086, l2: 0.905149, l3: 0.888326, l4: 0.909268, l5: 0.881216, l6: 0.935923

[epoch:  45/100, batch:   528/  792, ite: 55368] train loss: 4.868018, tar: 0.507284 
l0: 0.866201, l1: 0.869752, l2: 0.876332, l3: 0.891351, l4: 0.918770, l5: 1.001830, l6: 1.210361

[epoch:  45/100, batch:   530/  792, ite: 55369] train loss: 4.870247, tar: 0.507547 
l0: 0.429986, l1: 0.436864, l2: 0.438972, l3: 0.446166, l4: 0.474700, l5: 0.484150, l6: 0.586753

[epoch:  45/100, batch:   532/  792, ite: 55370] train loss: 4.869585, tar: 0.507490 
l0: 0.364537, l1: 0.369734, l2: 0.365570, l3: 0.362584, l4: 0.385412, l5: 0.532234, l6: 0.655617

[epoch:  45/100, batch:   534/  792, ite: 55371] train loss: 4.868745, tar: 0.50

[epoch:  45/100, batch:   614/  792, ite: 55411] train loss: 4.866499, tar: 0.507300 
l0: 0.480425, l1: 0.478258, l2: 0.487249, l3: 0.496660, l4: 0.503197, l5: 0.690099, l6: 0.701928

[epoch:  45/100, batch:   616/  792, ite: 55412] train loss: 4.866359, tar: 0.507281 
l0: 0.262397, l1: 0.264661, l2: 0.267620, l3: 0.277701, l4: 0.321902, l5: 0.432948, l6: 0.563813

[epoch:  45/100, batch:   618/  792, ite: 55413] train loss: 4.865004, tar: 0.507107 
l0: 0.373549, l1: 0.375911, l2: 0.377738, l3: 0.391096, l4: 0.425610, l5: 0.510392, l6: 0.671391

[epoch:  45/100, batch:   620/  792, ite: 55414] train loss: 4.864350, tar: 0.507013 
l0: 0.539437, l1: 0.537645, l2: 0.532675, l3: 0.534917, l4: 0.549426, l5: 0.580947, l6: 0.808618

[epoch:  45/100, batch:   622/  792, ite: 55415] train loss: 4.864353, tar: 0.507036 
l0: 0.437932, l1: 0.439097, l2: 0.435973, l3: 0.430852, l4: 0.465202, l5: 0.570017, l6: 0.662236

[epoch:  45/100, batch:   624/  792, ite: 55416] train loss: 4.863822, tar: 0.50

[epoch:  45/100, batch:   704/  792, ite: 55456] train loss: 4.854258, tar: 0.505572 
l0: 0.824702, l1: 0.819553, l2: 0.820160, l3: 0.820771, l4: 0.827065, l5: 0.829751, l6: 1.002068

[epoch:  45/100, batch:   706/  792, ite: 55457] train loss: 4.855714, tar: 0.505791 
l0: 0.432882, l1: 0.442514, l2: 0.442272, l3: 0.442805, l4: 0.458611, l5: 0.518090, l6: 0.618600

[epoch:  45/100, batch:   708/  792, ite: 55458] train loss: 4.855182, tar: 0.505741 
l0: 0.485081, l1: 0.479681, l2: 0.483105, l3: 0.500866, l4: 0.516932, l5: 0.620898, l6: 0.728006

[epoch:  45/100, batch:   710/  792, ite: 55459] train loss: 4.855051, tar: 0.505727 
l0: 0.283372, l1: 0.291637, l2: 0.289221, l3: 0.290630, l4: 0.324005, l5: 0.486230, l6: 0.643329

[epoch:  45/100, batch:   712/  792, ite: 55460] train loss: 4.853947, tar: 0.505574 
l0: 0.602256, l1: 0.601912, l2: 0.603874, l3: 0.605385, l4: 0.605868, l5: 0.690408, l6: 0.772244

[epoch:  45/100, batch:   714/  792, ite: 55461] train loss: 4.854346, tar: 0.50

l0: 0.575288, l1: 0.577848, l2: 0.577442, l3: 0.589607, l4: 0.600732, l5: 0.718892, l6: 1.060203

[epoch:  46/100, batch:     2/  792, ite: 55501] train loss: 4.846774, tar: 0.504852 
l0: 0.675786, l1: 0.679565, l2: 0.675941, l3: 0.677469, l4: 0.719589, l5: 0.842401, l6: 0.963595

[epoch:  46/100, batch:     4/  792, ite: 55502] train loss: 4.847714, tar: 0.504966 
l0: 0.501551, l1: 0.500914, l2: 0.498200, l3: 0.506482, l4: 0.546753, l5: 0.653577, l6: 0.849864

[epoch:  46/100, batch:     6/  792, ite: 55503] train loss: 4.847840, tar: 0.504964 
l0: 0.933993, l1: 0.942276, l2: 0.941734, l3: 0.938655, l4: 0.963549, l5: 1.050631, l6: 1.185504

[epoch:  46/100, batch:     8/  792, ite: 55504] train loss: 4.850096, tar: 0.505249 
l0: 0.457639, l1: 0.463820, l2: 0.468507, l3: 0.479042, l4: 0.503949, l5: 0.613523, l6: 0.757096

[epoch:  46/100, batch:    10/  792, ite: 55505] train loss: 4.849893, tar: 0.505218 
l0: 0.454974, l1: 0.457861, l2: 0.457804, l3: 0.457938, l4: 0.496455, l5: 0.5679

[epoch:  46/100, batch:    90/  792, ite: 55545] train loss: 4.866523, tar: 0.507383 
l0: 0.443374, l1: 0.452131, l2: 0.452483, l3: 0.457344, l4: 0.499589, l5: 0.645768, l6: 0.806473

[epoch:  46/100, batch:    92/  792, ite: 55546] train loss: 4.866371, tar: 0.507342 
l0: 0.677024, l1: 0.678946, l2: 0.682415, l3: 0.683707, l4: 0.678535, l5: 0.705081, l6: 0.868069

[epoch:  46/100, batch:    94/  792, ite: 55547] train loss: 4.867089, tar: 0.507451 
l0: 0.322707, l1: 0.323003, l2: 0.326033, l3: 0.332720, l4: 0.348377, l5: 0.365259, l6: 0.468060

[epoch:  46/100, batch:    96/  792, ite: 55548] train loss: 4.865879, tar: 0.507332 
l0: 0.361573, l1: 0.355417, l2: 0.365618, l3: 0.386358, l4: 0.414740, l5: 0.530149, l6: 0.748932

[epoch:  46/100, batch:    98/  792, ite: 55549] train loss: 4.865276, tar: 0.507238 
l0: 0.276742, l1: 0.278378, l2: 0.282620, l3: 0.291103, l4: 0.334275, l5: 0.462654, l6: 0.665729

[epoch:  46/100, batch:   100/  792, ite: 55550] train loss: 4.864258, tar: 0.50

[epoch:  46/100, batch:   180/  792, ite: 55590] train loss: 4.866760, tar: 0.507487 
l0: 0.544758, l1: 0.544796, l2: 0.544076, l3: 0.544234, l4: 0.556084, l5: 0.622885, l6: 0.798040

[epoch:  46/100, batch:   182/  792, ite: 55591] train loss: 4.866857, tar: 0.507510 
l0: 0.746837, l1: 0.748296, l2: 0.749997, l3: 0.760192, l4: 0.796388, l5: 0.851380, l6: 0.984616

[epoch:  46/100, batch:   184/  792, ite: 55592] train loss: 4.867961, tar: 0.507660 
l0: 0.521741, l1: 0.524253, l2: 0.522824, l3: 0.522160, l4: 0.561426, l5: 0.625038, l6: 0.668447

[epoch:  46/100, batch:   186/  792, ite: 55593] train loss: 4.867844, tar: 0.507669 
l0: 0.334311, l1: 0.337522, l2: 0.337517, l3: 0.337012, l4: 0.350796, l5: 0.447372, l6: 0.592919

[epoch:  46/100, batch:   188/  792, ite: 55594] train loss: 4.866925, tar: 0.507560 
l0: 0.539610, l1: 0.541954, l2: 0.539965, l3: 0.545691, l4: 0.572263, l5: 0.623707, l6: 0.794222

[epoch:  46/100, batch:   190/  792, ite: 55595] train loss: 4.867048, tar: 0.50

[epoch:  46/100, batch:   270/  792, ite: 55635] train loss: 4.850034, tar: 0.505383 
l0: 0.435364, l1: 0.437967, l2: 0.443312, l3: 0.455474, l4: 0.493611, l5: 0.558350, l6: 0.556952

[epoch:  46/100, batch:   272/  792, ite: 55636] train loss: 4.849492, tar: 0.505340 
l0: 0.637155, l1: 0.636753, l2: 0.636144, l3: 0.642878, l4: 0.678194, l5: 0.717936, l6: 0.893689

[epoch:  46/100, batch:   274/  792, ite: 55637] train loss: 4.850120, tar: 0.505421 
l0: 0.410320, l1: 0.416260, l2: 0.415553, l3: 0.419285, l4: 0.464519, l5: 0.564035, l6: 0.677493

[epoch:  46/100, batch:   276/  792, ite: 55638] train loss: 4.849618, tar: 0.505363 
l0: 0.320981, l1: 0.326765, l2: 0.326939, l3: 0.336528, l4: 0.346510, l5: 0.435583, l6: 0.610394

[epoch:  46/100, batch:   278/  792, ite: 55639] train loss: 4.848761, tar: 0.505250 
l0: 0.870061, l1: 0.857088, l2: 0.864965, l3: 0.884587, l4: 0.874943, l5: 0.967266, l6: 1.118270

[epoch:  46/100, batch:   280/  792, ite: 55640] train loss: 4.850424, tar: 0.50

[epoch:  46/100, batch:   360/  792, ite: 55680] train loss: 4.838853, tar: 0.504091 
l0: 0.564826, l1: 0.566938, l2: 0.567919, l3: 0.577374, l4: 0.620088, l5: 0.702636, l6: 0.930806

[epoch:  46/100, batch:   362/  792, ite: 55681] train loss: 4.839278, tar: 0.504127 
l0: 0.537966, l1: 0.541342, l2: 0.541070, l3: 0.533531, l4: 0.562407, l5: 0.581629, l6: 0.699526

[epoch:  46/100, batch:   364/  792, ite: 55682] train loss: 4.839199, tar: 0.504147 
l0: 0.302111, l1: 0.302643, l2: 0.302235, l3: 0.319265, l4: 0.358727, l5: 0.435751, l6: 0.509457

[epoch:  46/100, batch:   366/  792, ite: 55683] train loss: 4.838134, tar: 0.504027 
l0: 0.714977, l1: 0.711651, l2: 0.712791, l3: 0.711829, l4: 0.739133, l5: 0.803693, l6: 0.928041

[epoch:  46/100, batch:   368/  792, ite: 55684] train loss: 4.839012, tar: 0.504152 
l0: 0.540655, l1: 0.544299, l2: 0.547141, l3: 0.550522, l4: 0.554676, l5: 0.714451, l6: 0.915866

[epoch:  46/100, batch:   370/  792, ite: 55685] train loss: 4.839247, tar: 0.50

[epoch:  46/100, batch:   450/  792, ite: 55725] train loss: 4.843344, tar: 0.504579 
l0: 0.335766, l1: 0.343866, l2: 0.339918, l3: 0.342904, l4: 0.384755, l5: 0.508864, l6: 0.589013

[epoch:  46/100, batch:   452/  792, ite: 55726] train loss: 4.842546, tar: 0.504481 
l0: 0.399188, l1: 0.399422, l2: 0.398967, l3: 0.405945, l4: 0.420516, l5: 0.466238, l6: 0.621294

[epoch:  46/100, batch:   454/  792, ite: 55727] train loss: 4.841967, tar: 0.504420 
l0: 0.903612, l1: 0.901064, l2: 0.904619, l3: 0.894923, l4: 0.906778, l5: 0.909069, l6: 1.048399

[epoch:  46/100, batch:   456/  792, ite: 55728] train loss: 4.843526, tar: 0.504651 
l0: 0.583021, l1: 0.584169, l2: 0.582586, l3: 0.588268, l4: 0.597780, l5: 0.596089, l6: 0.801437

[epoch:  46/100, batch:   458/  792, ite: 55729] train loss: 4.843720, tar: 0.504697 
l0: 0.396304, l1: 0.396654, l2: 0.399884, l3: 0.415633, l4: 0.422265, l5: 0.492608, l6: 0.582613

[epoch:  46/100, batch:   460/  792, ite: 55730] train loss: 4.843037, tar: 0.50

[epoch:  46/100, batch:   540/  792, ite: 55770] train loss: 4.850932, tar: 0.505681 
l0: 0.552592, l1: 0.553363, l2: 0.554674, l3: 0.561591, l4: 0.595426, l5: 0.687995, l6: 0.811762

[epoch:  46/100, batch:   542/  792, ite: 55771] train loss: 4.851187, tar: 0.505707 
l0: 0.456905, l1: 0.464197, l2: 0.458731, l3: 0.455078, l4: 0.468637, l5: 0.596328, l6: 0.773982

[epoch:  46/100, batch:   544/  792, ite: 55772] train loss: 4.850975, tar: 0.505680 
l0: 0.745160, l1: 0.757712, l2: 0.747488, l3: 0.746330, l4: 0.759297, l5: 0.768695, l6: 0.910048

[epoch:  46/100, batch:   546/  792, ite: 55773] train loss: 4.851910, tar: 0.505815 
l0: 0.332588, l1: 0.339499, l2: 0.337833, l3: 0.340741, l4: 0.386244, l5: 0.515839, l6: 0.656022

[epoch:  46/100, batch:   548/  792, ite: 55774] train loss: 4.851208, tar: 0.505717 
l0: 0.873969, l1: 0.871915, l2: 0.865973, l3: 0.883856, l4: 0.931600, l5: 1.045219, l6: 1.056400

[epoch:  46/100, batch:   550/  792, ite: 55775] train loss: 4.852784, tar: 0.50

[epoch:  46/100, batch:   630/  792, ite: 55815] train loss: 4.848979, tar: 0.505235 
l0: 0.174862, l1: 0.180089, l2: 0.180280, l3: 0.187818, l4: 0.240572, l5: 0.337152, l6: 0.418903

[epoch:  46/100, batch:   632/  792, ite: 55816] train loss: 4.847559, tar: 0.505053 
l0: 0.461047, l1: 0.464084, l2: 0.463161, l3: 0.465212, l4: 0.478684, l5: 0.492982, l6: 0.757323

[epoch:  46/100, batch:   634/  792, ite: 55817] train loss: 4.847271, tar: 0.505029 
l0: 0.306713, l1: 0.309961, l2: 0.310644, l3: 0.319378, l4: 0.332834, l5: 0.393431, l6: 0.542672

[epoch:  46/100, batch:   636/  792, ite: 55818] train loss: 4.846339, tar: 0.504920 
l0: 0.307720, l1: 0.315705, l2: 0.313078, l3: 0.321553, l4: 0.363257, l5: 0.447765, l6: 0.607850

[epoch:  46/100, batch:   638/  792, ite: 55819] train loss: 4.845522, tar: 0.504811 
l0: 0.532187, l1: 0.530816, l2: 0.526494, l3: 0.522901, l4: 0.557221, l5: 0.699993, l6: 0.892030

[epoch:  46/100, batch:   640/  792, ite: 55820] train loss: 4.845757, tar: 0.50

[epoch:  46/100, batch:   720/  792, ite: 55860] train loss: 4.845519, tar: 0.504522 
l0: 0.384656, l1: 0.390761, l2: 0.391464, l3: 0.383818, l4: 0.393306, l5: 0.511438, l6: 0.671110

[epoch:  46/100, batch:   722/  792, ite: 55861] train loss: 4.844965, tar: 0.504457 
l0: 0.455749, l1: 0.458974, l2: 0.462433, l3: 0.462949, l4: 0.488220, l5: 0.585789, l6: 0.752735

[epoch:  46/100, batch:   724/  792, ite: 55862] train loss: 4.844747, tar: 0.504431 
l0: 0.243171, l1: 0.243499, l2: 0.240615, l3: 0.246379, l4: 0.258729, l5: 0.361628, l6: 0.543446

[epoch:  46/100, batch:   726/  792, ite: 55863] train loss: 4.843591, tar: 0.504291 
l0: 1.049012, l1: 1.037539, l2: 1.031471, l3: 1.026351, l4: 1.077359, l5: 1.088109, l6: 1.047686

[epoch:  46/100, batch:   728/  792, ite: 55864] train loss: 4.845489, tar: 0.504583 
l0: 0.259793, l1: 0.266500, l2: 0.264365, l3: 0.271624, l4: 0.292036, l5: 0.403133, l6: 0.427633

[epoch:  46/100, batch:   730/  792, ite: 55865] train loss: 4.844321, tar: 0.50

l0: 0.250708, l1: 0.254077, l2: 0.255450, l3: 0.256570, l4: 0.282198, l5: 0.431780, l6: 0.648485

[epoch:  47/100, batch:    18/  792, ite: 55905] train loss: 4.847609, tar: 0.505162 
l0: 0.236454, l1: 0.240366, l2: 0.238879, l3: 0.236130, l4: 0.247842, l5: 0.375714, l6: 0.398488

[epoch:  47/100, batch:    20/  792, ite: 55906] train loss: 4.846334, tar: 0.505021 
l0: 0.412533, l1: 0.421816, l2: 0.421442, l3: 0.427443, l4: 0.452883, l5: 0.613961, l6: 0.747145

[epoch:  47/100, batch:    22/  792, ite: 55907] train loss: 4.846067, tar: 0.504972 
l0: 0.289872, l1: 0.290473, l2: 0.288125, l3: 0.287438, l4: 0.324273, l5: 0.416916, l6: 0.506060

[epoch:  47/100, batch:    24/  792, ite: 55908] train loss: 4.845051, tar: 0.504860 
l0: 0.237267, l1: 0.239190, l2: 0.240794, l3: 0.242072, l4: 0.276844, l5: 0.384802, l6: 0.501713

[epoch:  47/100, batch:    26/  792, ite: 55909] train loss: 4.843891, tar: 0.504719 
l0: 0.541956, l1: 0.542351, l2: 0.540156, l3: 0.535181, l4: 0.545468, l5: 0.5516

[epoch:  47/100, batch:   106/  792, ite: 55949] train loss: 4.848379, tar: 0.505252 
l0: 0.297470, l1: 0.301030, l2: 0.300421, l3: 0.305940, l4: 0.335702, l5: 0.414272, l6: 0.650424

[epoch:  47/100, batch:   108/  792, ite: 55950] train loss: 4.847602, tar: 0.505145 
l0: 0.495530, l1: 0.498345, l2: 0.502357, l3: 0.512133, l4: 0.511122, l5: 0.602519, l6: 0.609840

[epoch:  47/100, batch:   110/  792, ite: 55951] train loss: 4.847427, tar: 0.505140 
l0: 0.277258, l1: 0.284604, l2: 0.284487, l3: 0.285492, l4: 0.320315, l5: 0.430761, l6: 0.510457

[epoch:  47/100, batch:   112/  792, ite: 55952] train loss: 4.846438, tar: 0.505023 
l0: 0.453670, l1: 0.462191, l2: 0.465617, l3: 0.470240, l4: 0.494898, l5: 0.667432, l6: 0.833946

[epoch:  47/100, batch:   114/  792, ite: 55953] train loss: 4.846419, tar: 0.504997 
l0: 0.762218, l1: 0.759635, l2: 0.765287, l3: 0.759004, l4: 0.755695, l5: 0.846851, l6: 1.004761

[epoch:  47/100, batch:   116/  792, ite: 55954] train loss: 4.847383, tar: 0.50

[epoch:  47/100, batch:   196/  792, ite: 55994] train loss: 4.851544, tar: 0.505617 
l0: 0.333538, l1: 0.334866, l2: 0.334755, l3: 0.334038, l4: 0.350855, l5: 0.537097, l6: 0.685214

[epoch:  47/100, batch:   198/  792, ite: 55995] train loss: 4.850977, tar: 0.505531 
l0: 0.253835, l1: 0.254270, l2: 0.258026, l3: 0.270986, l4: 0.275333, l5: 0.321310, l6: 0.385072

[epoch:  47/100, batch:   200/  792, ite: 55996] train loss: 4.849771, tar: 0.505405 
l0: 0.547757, l1: 0.550722, l2: 0.550309, l3: 0.561193, l4: 0.612640, l5: 0.705674, l6: 0.816692

[epoch:  47/100, batch:   202/  792, ite: 55997] train loss: 4.849989, tar: 0.505426 
l0: 0.216578, l1: 0.221854, l2: 0.220939, l3: 0.239610, l4: 0.282545, l5: 0.339640, l6: 0.516030

[epoch:  47/100, batch:   204/  792, ite: 55998] train loss: 4.848863, tar: 0.505282 
l0: 0.880601, l1: 0.885700, l2: 0.881875, l3: 0.878840, l4: 0.877742, l5: 0.900569, l6: 1.033987

[epoch:  47/100, batch:   206/  792, ite: 55999] train loss: 4.850087, tar: 0.50

[epoch:  47/100, batch:   286/  792, ite: 56039] train loss: 4.592696, tar: 0.463081 
l0: 1.985745, l1: 2.013640, l2: 2.052301, l3: 2.088353, l4: 2.220737, l5: 2.363566, l6: 2.403057

[epoch:  47/100, batch:   288/  792, ite: 56040] train loss: 4.917197, tar: 0.501148 
l0: 0.431166, l1: 0.427891, l2: 0.432557, l3: 0.439820, l4: 0.445566, l5: 0.465041, l6: 0.590001

[epoch:  47/100, batch:   290/  792, ite: 56041] train loss: 4.891113, tar: 0.499441 
l0: 0.254048, l1: 0.259163, l2: 0.261066, l3: 0.266096, l4: 0.306068, l5: 0.420254, l6: 0.539657

[epoch:  47/100, batch:   292/  792, ite: 56042] train loss: 4.844664, tar: 0.493598 
l0: 0.306157, l1: 0.309004, l2: 0.306860, l3: 0.312222, l4: 0.334939, l5: 0.465264, l6: 0.562104

[epoch:  47/100, batch:   294/  792, ite: 56043] train loss: 4.805109, tar: 0.489239 
l0: 0.513993, l1: 0.519381, l2: 0.514060, l3: 0.520691, l4: 0.553771, l5: 0.620272, l6: 0.837575

[epoch:  47/100, batch:   296/  792, ite: 56044] train loss: 4.809080, tar: 0.48

[epoch:  47/100, batch:   376/  792, ite: 56084] train loss: 4.793344, tar: 0.493522 
l0: 0.463574, l1: 0.460639, l2: 0.462194, l3: 0.454324, l4: 0.481227, l5: 0.557177, l6: 0.656667

[epoch:  47/100, batch:   378/  792, ite: 56085] train loss: 4.787371, tar: 0.493170 
l0: 0.516708, l1: 0.521919, l2: 0.521760, l3: 0.533089, l4: 0.552606, l5: 0.586548, l6: 0.796993

[epoch:  47/100, batch:   380/  792, ite: 56086] train loss: 4.788395, tar: 0.493443 
l0: 0.486402, l1: 0.486509, l2: 0.487631, l3: 0.481877, l4: 0.502522, l5: 0.522416, l6: 0.715995

[epoch:  47/100, batch:   382/  792, ite: 56087] train loss: 4.785845, tar: 0.493362 
l0: 0.579836, l1: 0.579838, l2: 0.579036, l3: 0.582460, l4: 0.610054, l5: 0.683574, l6: 0.745823

[epoch:  47/100, batch:   384/  792, ite: 56088] train loss: 4.790040, tar: 0.494345 
l0: 0.486814, l1: 0.491977, l2: 0.490143, l3: 0.493999, l4: 0.539973, l5: 0.618038, l6: 0.784027

[epoch:  47/100, batch:   386/  792, ite: 56089] train loss: 4.789477, tar: 0.49

[epoch:  47/100, batch:   466/  792, ite: 56129] train loss: 4.786443, tar: 0.493813 
l0: 0.557101, l1: 0.558513, l2: 0.557280, l3: 0.558361, l4: 0.575592, l5: 0.731262, l6: 0.838473

[epoch:  47/100, batch:   468/  792, ite: 56130] train loss: 4.790164, tar: 0.494299 
l0: 0.637698, l1: 0.637893, l2: 0.633422, l3: 0.635379, l4: 0.654694, l5: 0.733150, l6: 0.821813

[epoch:  47/100, batch:   470/  792, ite: 56131] train loss: 4.797112, tar: 0.495394 
l0: 0.353251, l1: 0.359251, l2: 0.364059, l3: 0.368423, l4: 0.386048, l5: 0.427353, l6: 0.570647

[epoch:  47/100, batch:   472/  792, ite: 56132] train loss: 4.786783, tar: 0.494317 
l0: 0.245639, l1: 0.250489, l2: 0.248703, l3: 0.255940, l4: 0.283801, l5: 0.353559, l6: 0.569466

[epoch:  47/100, batch:   474/  792, ite: 56133] train loss: 4.771511, tar: 0.492448 
l0: 0.839739, l1: 0.844939, l2: 0.843452, l3: 0.841687, l4: 0.882706, l5: 0.899363, l6: 0.983868

[epoch:  47/100, batch:   476/  792, ite: 56134] train loss: 4.788933, tar: 0.49

[epoch:  47/100, batch:   556/  792, ite: 56174] train loss: 4.686984, tar: 0.481934 
l0: 0.589581, l1: 0.584983, l2: 0.580459, l3: 0.584493, l4: 0.649680, l5: 0.769540, l6: 0.858858

[epoch:  47/100, batch:   558/  792, ite: 56175] train loss: 4.692572, tar: 0.482549 
l0: 0.339621, l1: 0.337835, l2: 0.338883, l3: 0.338444, l4: 0.378422, l5: 0.484489, l6: 0.527099

[epoch:  47/100, batch:   560/  792, ite: 56176] train loss: 4.684508, tar: 0.481737 
l0: 0.341364, l1: 0.344382, l2: 0.346265, l3: 0.347625, l4: 0.385098, l5: 0.455387, l6: 0.630771

[epoch:  47/100, batch:   562/  792, ite: 56177] train loss: 4.677961, tar: 0.480944 
l0: 0.355983, l1: 0.357376, l2: 0.355965, l3: 0.354511, l4: 0.357985, l5: 0.468065, l6: 0.637656

[epoch:  47/100, batch:   564/  792, ite: 56178] train loss: 4.671507, tar: 0.480242 
l0: 0.344771, l1: 0.347170, l2: 0.348289, l3: 0.343076, l4: 0.383883, l5: 0.540038, l6: 0.678861

[epoch:  47/100, batch:   566/  792, ite: 56179] train loss: 4.666335, tar: 0.47

[epoch:  47/100, batch:   646/  792, ite: 56219] train loss: 4.675240, tar: 0.480967 
l0: 0.180436, l1: 0.181709, l2: 0.180207, l3: 0.181693, l4: 0.202361, l5: 0.274859, l6: 0.356259

[epoch:  47/100, batch:   648/  792, ite: 56220] train loss: 4.662672, tar: 0.479601 
l0: 0.496653, l1: 0.494469, l2: 0.497648, l3: 0.500399, l4: 0.519518, l5: 0.651805, l6: 0.746767

[epoch:  47/100, batch:   650/  792, ite: 56221] train loss: 4.662678, tar: 0.479679 
l0: 0.218265, l1: 0.220301, l2: 0.222587, l3: 0.236065, l4: 0.278226, l5: 0.365119, l6: 0.563816

[epoch:  47/100, batch:   652/  792, ite: 56222] train loss: 4.653614, tar: 0.478501 
l0: 0.443884, l1: 0.445843, l2: 0.444090, l3: 0.443259, l4: 0.475469, l5: 0.607233, l6: 0.838486

[epoch:  47/100, batch:   654/  792, ite: 56223] train loss: 4.653144, tar: 0.478346 
l0: 0.352070, l1: 0.356517, l2: 0.358798, l3: 0.356583, l4: 0.394481, l5: 0.472972, l6: 0.524868

[epoch:  47/100, batch:   656/  792, ite: 56224] train loss: 4.647690, tar: 0.47

[epoch:  47/100, batch:   736/  792, ite: 56264] train loss: 4.660789, tar: 0.480186 
l0: 0.339622, l1: 0.340167, l2: 0.341094, l3: 0.351298, l4: 0.378452, l5: 0.468910, l6: 0.637886

[epoch:  47/100, batch:   738/  792, ite: 56265] train loss: 4.656957, tar: 0.479655 
l0: 0.396078, l1: 0.402659, l2: 0.401930, l3: 0.416081, l4: 0.450098, l5: 0.577470, l6: 0.692078

[epoch:  47/100, batch:   740/  792, ite: 56266] train loss: 4.654944, tar: 0.479341 
l0: 0.565605, l1: 0.563388, l2: 0.567081, l3: 0.575540, l4: 0.608845, l5: 0.748712, l6: 1.029993

[epoch:  47/100, batch:   742/  792, ite: 56267] train loss: 4.659008, tar: 0.479664 
l0: 0.447280, l1: 0.450994, l2: 0.451853, l3: 0.447765, l4: 0.489002, l5: 0.637034, l6: 0.893902

[epoch:  47/100, batch:   744/  792, ite: 56268] train loss: 4.659294, tar: 0.479543 
l0: 0.323821, l1: 0.328501, l2: 0.330248, l3: 0.344088, l4: 0.374683, l5: 0.453371, l6: 0.552385

[epoch:  47/100, batch:   746/  792, ite: 56269] train loss: 4.654411, tar: 0.47

l0: 0.358698, l1: 0.358991, l2: 0.358587, l3: 0.359355, l4: 0.357544, l5: 0.398568, l6: 0.598944

[epoch:  48/100, batch:    34/  792, ite: 56309] train loss: 4.630002, tar: 0.475952 
l0: 0.510718, l1: 0.513646, l2: 0.513817, l3: 0.510708, l4: 0.494465, l5: 0.538024, l6: 0.759828

[epoch:  48/100, batch:    36/  792, ite: 56310] train loss: 4.630403, tar: 0.476064 
l0: 0.990030, l1: 0.977386, l2: 0.977435, l3: 0.979882, l4: 0.994538, l5: 1.026668, l6: 1.114937

[epoch:  48/100, batch:    38/  792, ite: 56311] train loss: 4.641957, tar: 0.477717 
l0: 0.527313, l1: 0.530976, l2: 0.527906, l3: 0.537502, l4: 0.575552, l5: 0.641564, l6: 0.797989

[epoch:  48/100, batch:    40/  792, ite: 56312] train loss: 4.643363, tar: 0.477876 
l0: 0.278987, l1: 0.280624, l2: 0.274217, l3: 0.274878, l4: 0.312322, l5: 0.401367, l6: 0.462338

[epoch:  48/100, batch:    42/  792, ite: 56313] train loss: 4.637535, tar: 0.477240 
l0: 0.594930, l1: 0.596953, l2: 0.597764, l3: 0.596231, l4: 0.610525, l5: 0.6843

[epoch:  48/100, batch:   122/  792, ite: 56353] train loss: 4.734628, tar: 0.489505 
l0: 0.437930, l1: 0.443994, l2: 0.449785, l3: 0.469548, l4: 0.491827, l5: 0.618076, l6: 0.777874

[epoch:  48/100, batch:   124/  792, ite: 56354] train loss: 4.734151, tar: 0.489359 
l0: 0.260315, l1: 0.263501, l2: 0.261807, l3: 0.272199, l4: 0.293378, l5: 0.458575, l6: 0.643233

[epoch:  48/100, batch:   126/  792, ite: 56355] train loss: 4.729517, tar: 0.488714 
l0: 0.251450, l1: 0.252637, l2: 0.253137, l3: 0.261421, l4: 0.285382, l5: 0.374872, l6: 0.472036

[epoch:  48/100, batch:   128/  792, ite: 56356] train loss: 4.723644, tar: 0.488048 
l0: 0.276898, l1: 0.283889, l2: 0.282471, l3: 0.280161, l4: 0.290833, l5: 0.406223, l6: 0.530415

[epoch:  48/100, batch:   130/  792, ite: 56357] train loss: 4.718673, tar: 0.487456 
l0: 0.320109, l1: 0.321499, l2: 0.323072, l3: 0.335397, l4: 0.366396, l5: 0.492800, l6: 0.572414

[epoch:  48/100, batch:   132/  792, ite: 56358] train loss: 4.714845, tar: 0.48

[epoch:  48/100, batch:   212/  792, ite: 56398] train loss: 4.719950, tar: 0.487833 
l0: 0.443139, l1: 0.445212, l2: 0.448200, l3: 0.451434, l4: 0.437196, l5: 0.534875, l6: 0.637541

[epoch:  48/100, batch:   214/  792, ite: 56399] train loss: 4.718271, tar: 0.487721 
l0: 0.254695, l1: 0.260548, l2: 0.259141, l3: 0.265963, l4: 0.298232, l5: 0.411139, l6: 0.528126

[epoch:  48/100, batch:   216/  792, ite: 56400] train loss: 4.713631, tar: 0.487138 
l0: 0.678157, l1: 0.671955, l2: 0.669631, l3: 0.667648, l4: 0.655654, l5: 0.724969, l6: 0.878364

[epoch:  48/100, batch:   218/  792, ite: 56401] train loss: 4.716388, tar: 0.487615 
l0: 0.375561, l1: 0.381479, l2: 0.380869, l3: 0.383523, l4: 0.357060, l5: 0.463025, l6: 0.643101

[epoch:  48/100, batch:   220/  792, ite: 56402] train loss: 4.713786, tar: 0.487336 
l0: 0.298475, l1: 0.304567, l2: 0.303527, l3: 0.314044, l4: 0.345783, l5: 0.518780, l6: 0.736300

[epoch:  48/100, batch:   222/  792, ite: 56403] train loss: 4.711119, tar: 0.48

[epoch:  48/100, batch:   302/  792, ite: 56443] train loss: 4.736633, tar: 0.489949 
l0: 0.236997, l1: 0.244387, l2: 0.246050, l3: 0.250102, l4: 0.257286, l5: 0.366105, l6: 0.513575

[epoch:  48/100, batch:   304/  792, ite: 56444] train loss: 4.732153, tar: 0.489379 
l0: 0.607619, l1: 0.611862, l2: 0.612339, l3: 0.625183, l4: 0.622226, l5: 0.755106, l6: 1.056638

[epoch:  48/100, batch:   306/  792, ite: 56445] train loss: 4.734885, tar: 0.489645 
l0: 0.648395, l1: 0.653185, l2: 0.650063, l3: 0.654873, l4: 0.688310, l5: 0.770223, l6: 0.882098

[epoch:  48/100, batch:   308/  792, ite: 56446] train loss: 4.737522, tar: 0.490000 
l0: 0.453327, l1: 0.465225, l2: 0.469737, l3: 0.475228, l4: 0.489764, l5: 0.678963, l6: 0.732590

[epoch:  48/100, batch:   310/  792, ite: 56447] train loss: 4.737013, tar: 0.489918 
l0: 0.363002, l1: 0.364464, l2: 0.366633, l3: 0.371004, l4: 0.402739, l5: 0.507371, l6: 0.705752

[epoch:  48/100, batch:   312/  792, ite: 56448] train loss: 4.734939, tar: 0.48

[epoch:  48/100, batch:   392/  792, ite: 56488] train loss: 4.746092, tar: 0.490865 
l0: 0.258100, l1: 0.262813, l2: 0.264363, l3: 0.267991, l4: 0.346493, l5: 0.446186, l6: 0.541561

[epoch:  48/100, batch:   394/  792, ite: 56489] train loss: 4.742484, tar: 0.490389 
l0: 0.328506, l1: 0.328991, l2: 0.335229, l3: 0.349302, l4: 0.361113, l5: 0.475932, l6: 0.627253

[epoch:  48/100, batch:   396/  792, ite: 56490] train loss: 4.740089, tar: 0.490059 
l0: 0.326133, l1: 0.329477, l2: 0.330926, l3: 0.327681, l4: 0.366093, l5: 0.470136, l6: 0.570212

[epoch:  48/100, batch:   398/  792, ite: 56491] train loss: 4.737303, tar: 0.489725 
l0: 0.732873, l1: 0.733598, l2: 0.737773, l3: 0.736263, l4: 0.754862, l5: 0.801260, l6: 0.855400

[epoch:  48/100, batch:   400/  792, ite: 56492] train loss: 4.740348, tar: 0.490219 
l0: 0.311218, l1: 0.311766, l2: 0.314487, l3: 0.317634, l4: 0.342746, l5: 0.411603, l6: 0.543824

[epoch:  48/100, batch:   402/  792, ite: 56493] train loss: 4.737040, tar: 0.48

[epoch:  48/100, batch:   482/  792, ite: 56533] train loss: 4.718732, tar: 0.487735 
l0: 0.204526, l1: 0.209555, l2: 0.204144, l3: 0.210040, l4: 0.252680, l5: 0.366259, l6: 0.618730

[epoch:  48/100, batch:   484/  792, ite: 56534] train loss: 4.715014, tar: 0.487205 
l0: 0.301655, l1: 0.304727, l2: 0.303319, l3: 0.312440, l4: 0.336563, l5: 0.418033, l6: 0.572673

[epoch:  48/100, batch:   486/  792, ite: 56535] train loss: 4.712280, tar: 0.486858 
l0: 0.731349, l1: 0.740295, l2: 0.741254, l3: 0.744327, l4: 0.751720, l5: 0.775616, l6: 0.777354

[epoch:  48/100, batch:   488/  792, ite: 56536] train loss: 4.714965, tar: 0.487314 
l0: 0.440744, l1: 0.448729, l2: 0.448786, l3: 0.451173, l4: 0.466940, l5: 0.548745, l6: 0.589026

[epoch:  48/100, batch:   490/  792, ite: 56537] train loss: 4.713690, tar: 0.487228 
l0: 0.230416, l1: 0.236465, l2: 0.231803, l3: 0.235669, l4: 0.253141, l5: 0.356483, l6: 0.376715

[epoch:  48/100, batch:   492/  792, ite: 56538] train loss: 4.709424, tar: 0.48

[epoch:  48/100, batch:   572/  792, ite: 56578] train loss: 4.695916, tar: 0.484664 
l0: 0.513204, l1: 0.514675, l2: 0.517262, l3: 0.524472, l4: 0.558393, l5: 0.668159, l6: 0.871764

[epoch:  48/100, batch:   574/  792, ite: 56579] train loss: 4.696825, tar: 0.484713 
l0: 0.406679, l1: 0.407667, l2: 0.409160, l3: 0.404606, l4: 0.443162, l5: 0.590523, l6: 0.772001

[epoch:  48/100, batch:   576/  792, ite: 56580] train loss: 4.696064, tar: 0.484579 
l0: 0.551631, l1: 0.564027, l2: 0.559096, l3: 0.560215, l4: 0.599880, l5: 0.593716, l6: 0.732812

[epoch:  48/100, batch:   578/  792, ite: 56581] train loss: 4.696601, tar: 0.484694 
l0: 0.358812, l1: 0.366010, l2: 0.366609, l3: 0.373970, l4: 0.397181, l5: 0.458610, l6: 0.530997

[epoch:  48/100, batch:   580/  792, ite: 56582] train loss: 4.694435, tar: 0.484478 
l0: 0.435437, l1: 0.437156, l2: 0.434292, l3: 0.437633, l4: 0.470925, l5: 0.546348, l6: 0.718668

[epoch:  48/100, batch:   582/  792, ite: 56583] train loss: 4.693562, tar: 0.48

[epoch:  48/100, batch:   662/  792, ite: 56623] train loss: 4.732271, tar: 0.489514 
l0: 0.483801, l1: 0.489982, l2: 0.489261, l3: 0.488364, l4: 0.512468, l5: 0.660066, l6: 1.073389

[epoch:  48/100, batch:   664/  792, ite: 56624] train loss: 4.732875, tar: 0.489504 
l0: 0.496238, l1: 0.506713, l2: 0.509793, l3: 0.518276, l4: 0.528352, l5: 0.606335, l6: 0.820294

[epoch:  48/100, batch:   666/  792, ite: 56625] train loss: 4.733312, tar: 0.489515 
l0: 0.602485, l1: 0.610347, l2: 0.614769, l3: 0.619141, l4: 0.633931, l5: 0.671031, l6: 1.026470

[epoch:  48/100, batch:   668/  792, ite: 56626] train loss: 4.734960, tar: 0.489696 
l0: 0.590230, l1: 0.600005, l2: 0.604591, l3: 0.609054, l4: 0.634025, l5: 0.767319, l6: 0.897012

[epoch:  48/100, batch:   670/  792, ite: 56627] train loss: 4.736317, tar: 0.489856 
l0: 0.552752, l1: 0.558224, l2: 0.556967, l3: 0.557189, l4: 0.571941, l5: 0.676817, l6: 0.888923

[epoch:  48/100, batch:   672/  792, ite: 56628] train loss: 4.737202, tar: 0.48

[epoch:  48/100, batch:   752/  792, ite: 56668] train loss: 4.717758, tar: 0.487472 
l0: 0.774606, l1: 0.786819, l2: 0.789024, l3: 0.792680, l4: 0.796734, l5: 0.856560, l6: 1.002756

[epoch:  48/100, batch:   754/  792, ite: 56669] train loss: 4.720885, tar: 0.487901 
l0: 0.368185, l1: 0.368608, l2: 0.372119, l3: 0.387352, l4: 0.403362, l5: 0.508372, l6: 0.527757

[epoch:  48/100, batch:   756/  792, ite: 56670] train loss: 4.719071, tar: 0.487722 
l0: 0.597436, l1: 0.600244, l2: 0.602706, l3: 0.598271, l4: 0.618958, l5: 0.769507, l6: 0.846027

[epoch:  48/100, batch:   758/  792, ite: 56671] train loss: 4.720294, tar: 0.487886 
l0: 0.359164, l1: 0.362836, l2: 0.362814, l3: 0.363580, l4: 0.410431, l5: 0.611861, l6: 0.820271

[epoch:  48/100, batch:   760/  792, ite: 56672] train loss: 4.719301, tar: 0.487694 
l0: 0.359724, l1: 0.357615, l2: 0.361996, l3: 0.367652, l4: 0.382848, l5: 0.442136, l6: 0.475691

[epoch:  48/100, batch:   762/  792, ite: 56673] train loss: 4.717158, tar: 0.48

l0: 0.767323, l1: 0.767754, l2: 0.768010, l3: 0.778847, l4: 0.771797, l5: 0.760612, l6: 1.012531

[epoch:  49/100, batch:    50/  792, ite: 56713] train loss: 4.741955, tar: 0.490182 
l0: 0.560841, l1: 0.562921, l2: 0.560282, l3: 0.566724, l4: 0.582149, l5: 0.639898, l6: 0.808094

[epoch:  49/100, batch:    52/  792, ite: 56714] train loss: 4.742582, tar: 0.490281 
l0: 0.378997, l1: 0.386230, l2: 0.383851, l3: 0.398804, l4: 0.438661, l5: 0.504746, l6: 0.552389

[epoch:  49/100, batch:    54/  792, ite: 56715] train loss: 4.741020, tar: 0.490126 
l0: 0.288018, l1: 0.288955, l2: 0.285989, l3: 0.292006, l4: 0.312572, l5: 0.351131, l6: 0.522555

[epoch:  49/100, batch:    56/  792, ite: 56716] train loss: 4.738560, tar: 0.489843 
l0: 0.324409, l1: 0.323586, l2: 0.317169, l3: 0.315622, l4: 0.319621, l5: 0.450148, l6: 0.551248

[epoch:  49/100, batch:    58/  792, ite: 56717] train loss: 4.736435, tar: 0.489613 
l0: 0.519047, l1: 0.522494, l2: 0.525833, l3: 0.530668, l4: 0.565105, l5: 0.6636

[epoch:  49/100, batch:   138/  792, ite: 56757] train loss: 4.749678, tar: 0.491027 
l0: 0.680643, l1: 0.679718, l2: 0.677389, l3: 0.676095, l4: 0.699207, l5: 0.750926, l6: 0.929053

[epoch:  49/100, batch:   140/  792, ite: 56758] train loss: 4.751404, tar: 0.491278 
l0: 0.316725, l1: 0.314195, l2: 0.312787, l3: 0.317468, l4: 0.358006, l5: 0.496788, l6: 0.638938

[epoch:  49/100, batch:   142/  792, ite: 56759] train loss: 4.749710, tar: 0.491048 
l0: 0.311821, l1: 0.308592, l2: 0.304723, l3: 0.306164, l4: 0.320143, l5: 0.356424, l6: 0.484904

[epoch:  49/100, batch:   144/  792, ite: 56760] train loss: 4.747271, tar: 0.490812 
l0: 0.970614, l1: 0.976302, l2: 0.982486, l3: 1.000053, l4: 1.005479, l5: 1.167449, l6: 1.756362

[epoch:  49/100, batch:   146/  792, ite: 56761] train loss: 4.753422, tar: 0.491442 
l0: 0.529528, l1: 0.531925, l2: 0.529691, l3: 0.523065, l4: 0.546245, l5: 0.605425, l6: 0.732909

[epoch:  49/100, batch:   148/  792, ite: 56762] train loss: 4.753485, tar: 0.49

[epoch:  49/100, batch:   228/  792, ite: 56802] train loss: 4.753593, tar: 0.491646 
l0: 0.311817, l1: 0.314667, l2: 0.312760, l3: 0.313617, l4: 0.337605, l5: 0.490555, l6: 0.667153

[epoch:  49/100, batch:   230/  792, ite: 56803] train loss: 4.752039, tar: 0.491422 
l0: 0.520687, l1: 0.523312, l2: 0.522489, l3: 0.539330, l4: 0.559036, l5: 0.612520, l6: 0.791889

[epoch:  49/100, batch:   232/  792, ite: 56804] train loss: 4.752289, tar: 0.491458 
l0: 0.553226, l1: 0.542839, l2: 0.543914, l3: 0.544891, l4: 0.557303, l5: 0.606579, l6: 0.671181

[epoch:  49/100, batch:   234/  792, ite: 56805] train loss: 4.752329, tar: 0.491535 
l0: 0.581792, l1: 0.589759, l2: 0.596412, l3: 0.597811, l4: 0.599561, l5: 0.636470, l6: 0.875367

[epoch:  49/100, batch:   236/  792, ite: 56806] train loss: 4.753116, tar: 0.491647 
l0: 0.430728, l1: 0.431131, l2: 0.430853, l3: 0.428896, l4: 0.456437, l5: 0.568201, l6: 0.690540

[epoch:  49/100, batch:   238/  792, ite: 56807] train loss: 4.752509, tar: 0.49

[epoch:  49/100, batch:   318/  792, ite: 56847] train loss: 4.755566, tar: 0.492098 
l0: 0.284685, l1: 0.288406, l2: 0.288519, l3: 0.302319, l4: 0.341861, l5: 0.498187, l6: 0.617021

[epoch:  49/100, batch:   320/  792, ite: 56848] train loss: 4.753884, tar: 0.491853 
l0: 0.458943, l1: 0.461042, l2: 0.459300, l3: 0.446784, l4: 0.469598, l5: 0.576477, l6: 0.768262

[epoch:  49/100, batch:   322/  792, ite: 56849] train loss: 4.753516, tar: 0.491815 
l0: 0.513078, l1: 0.514263, l2: 0.514040, l3: 0.514204, l4: 0.532342, l5: 0.562047, l6: 0.703928

[epoch:  49/100, batch:   324/  792, ite: 56850] train loss: 4.753483, tar: 0.491840 
l0: 0.347137, l1: 0.340931, l2: 0.341593, l3: 0.345983, l4: 0.384311, l5: 0.452274, l6: 0.554737

[epoch:  49/100, batch:   326/  792, ite: 56851] train loss: 4.751934, tar: 0.491670 
l0: 0.241321, l1: 0.249702, l2: 0.252648, l3: 0.257269, l4: 0.297782, l5: 0.386463, l6: 0.530072

[epoch:  49/100, batch:   328/  792, ite: 56852] train loss: 4.749654, tar: 0.49

[epoch:  49/100, batch:   408/  792, ite: 56892] train loss: 4.744654, tar: 0.490802 
l0: 0.378140, l1: 0.382389, l2: 0.382111, l3: 0.395839, l4: 0.414560, l5: 0.516308, l6: 0.678249

[epoch:  49/100, batch:   410/  792, ite: 56893] train loss: 4.743764, tar: 0.490676 
l0: 0.213633, l1: 0.215653, l2: 0.216423, l3: 0.218655, l4: 0.258777, l5: 0.347355, l6: 0.479114

[epoch:  49/100, batch:   412/  792, ite: 56894] train loss: 4.741264, tar: 0.490366 
l0: 0.661290, l1: 0.669137, l2: 0.670616, l3: 0.670302, l4: 0.708256, l5: 0.854605, l6: 0.971192

[epoch:  49/100, batch:   414/  792, ite: 56895] train loss: 4.742988, tar: 0.490557 
l0: 0.598716, l1: 0.603495, l2: 0.603710, l3: 0.595623, l4: 0.645644, l5: 0.703882, l6: 0.929754

[epoch:  49/100, batch:   416/  792, ite: 56896] train loss: 4.744023, tar: 0.490677 
l0: 0.452908, l1: 0.454491, l2: 0.455243, l3: 0.465024, l4: 0.502138, l5: 0.509997, l6: 0.554121

[epoch:  49/100, batch:   418/  792, ite: 56897] train loss: 4.743254, tar: 0.49

[epoch:  49/100, batch:   498/  792, ite: 56937] train loss: 4.736859, tar: 0.489534 
l0: 0.255071, l1: 0.257492, l2: 0.256983, l3: 0.263325, l4: 0.299609, l5: 0.438710, l6: 0.515290

[epoch:  49/100, batch:   500/  792, ite: 56938] train loss: 4.734787, tar: 0.489284 
l0: 0.473027, l1: 0.477101, l2: 0.480636, l3: 0.488899, l4: 0.540710, l5: 0.694679, l6: 1.009105

[epoch:  49/100, batch:   502/  792, ite: 56939] train loss: 4.735299, tar: 0.489267 
l0: 0.817825, l1: 0.824197, l2: 0.816017, l3: 0.824817, l4: 0.795414, l5: 0.751320, l6: 0.852723

[epoch:  49/100, batch:   504/  792, ite: 56940] train loss: 4.737313, tar: 0.489616 
l0: 0.513078, l1: 0.510556, l2: 0.512644, l3: 0.514292, l4: 0.545891, l5: 0.597795, l6: 0.834426

[epoch:  49/100, batch:   506/  792, ite: 56941] train loss: 4.737537, tar: 0.489641 
l0: 0.274411, l1: 0.287221, l2: 0.287691, l3: 0.293921, l4: 0.336966, l5: 0.479530, l6: 0.618868

[epoch:  49/100, batch:   508/  792, ite: 56942] train loss: 4.735961, tar: 0.48

[epoch:  49/100, batch:   588/  792, ite: 56982] train loss: 4.723133, tar: 0.488135 
l0: 0.281514, l1: 0.286844, l2: 0.287805, l3: 0.297650, l4: 0.322772, l5: 0.446589, l6: 0.611717

[epoch:  49/100, batch:   590/  792, ite: 56983] train loss: 4.721610, tar: 0.487925 
l0: 0.429404, l1: 0.429975, l2: 0.430957, l3: 0.444615, l4: 0.474125, l5: 0.625680, l6: 0.876509

[epoch:  49/100, batch:   592/  792, ite: 56984] train loss: 4.721542, tar: 0.487866 
l0: 0.478461, l1: 0.493142, l2: 0.493575, l3: 0.507507, l4: 0.537515, l5: 0.628249, l6: 0.926509

[epoch:  49/100, batch:   594/  792, ite: 56985] train loss: 4.721958, tar: 0.487856 
l0: 0.409832, l1: 0.421195, l2: 0.419728, l3: 0.421608, l4: 0.436633, l5: 0.543341, l6: 0.726267

[epoch:  49/100, batch:   596/  792, ite: 56986] train loss: 4.721365, tar: 0.487777 
l0: 0.367564, l1: 0.374354, l2: 0.376901, l3: 0.377109, l4: 0.386199, l5: 0.537734, l6: 0.611626

[epoch:  49/100, batch:   598/  792, ite: 56987] train loss: 4.720447, tar: 0.48

[epoch:  49/100, batch:   678/  792, ite: 57027] train loss: 4.715114, tar: 0.487366 
l0: 0.379505, l1: 0.382668, l2: 0.384894, l3: 0.385184, l4: 0.408697, l5: 0.461965, l6: 0.574849

[epoch:  49/100, batch:   680/  792, ite: 57028] train loss: 4.714064, tar: 0.487261 
l0: 0.402222, l1: 0.407184, l2: 0.408065, l3: 0.423274, l4: 0.436309, l5: 0.530874, l6: 0.707554

[epoch:  49/100, batch:   682/  792, ite: 57029] train loss: 4.713524, tar: 0.487178 
l0: 0.216263, l1: 0.225507, l2: 0.227292, l3: 0.227615, l4: 0.259380, l5: 0.338063, l6: 0.509634

[epoch:  49/100, batch:   684/  792, ite: 57030] train loss: 4.711457, tar: 0.486915 
l0: 0.495578, l1: 0.501207, l2: 0.505073, l3: 0.512262, l4: 0.537901, l5: 0.611944, l6: 0.734130

[epoch:  49/100, batch:   686/  792, ite: 57031] train loss: 4.711444, tar: 0.486924 
l0: 0.267455, l1: 0.271542, l2: 0.271811, l3: 0.275195, l4: 0.316405, l5: 0.437942, l6: 0.617802

[epoch:  49/100, batch:   688/  792, ite: 57032] train loss: 4.709897, tar: 0.48

[epoch:  49/100, batch:   768/  792, ite: 57072] train loss: 4.714129, tar: 0.487403 
l0: 0.456984, l1: 0.463360, l2: 0.467354, l3: 0.463340, l4: 0.496249, l5: 0.665437, l6: 0.790163

[epoch:  49/100, batch:   770/  792, ite: 57073] train loss: 4.714052, tar: 0.487375 
l0: 0.506837, l1: 0.521855, l2: 0.508923, l3: 0.501780, l4: 0.504513, l5: 0.600972, l6: 1.042882

[epoch:  49/100, batch:   772/  792, ite: 57074] train loss: 4.714521, tar: 0.487393 
l0: 0.387896, l1: 0.395076, l2: 0.395032, l3: 0.402851, l4: 0.407129, l5: 0.567042, l6: 0.714692

[epoch:  49/100, batch:   774/  792, ite: 57075] train loss: 4.713843, tar: 0.487301 
l0: 0.448971, l1: 0.457248, l2: 0.453969, l3: 0.449216, l4: 0.476967, l5: 0.547072, l6: 0.660249

[epoch:  49/100, batch:   776/  792, ite: 57076] train loss: 4.713389, tar: 0.487265 
l0: 0.580104, l1: 0.590451, l2: 0.593934, l3: 0.587234, l4: 0.597542, l5: 0.694222, l6: 0.835741

[epoch:  49/100, batch:   778/  792, ite: 57077] train loss: 4.713896, tar: 0.48

l0: 0.563117, l1: 0.555968, l2: 0.552720, l3: 0.550567, l4: 0.564532, l5: 0.669326, l6: 1.333649

[epoch:  50/100, batch:    66/  792, ite: 57117] train loss: 4.712567, tar: 0.487031 
l0: 0.331562, l1: 0.335063, l2: 0.334125, l3: 0.333247, l4: 0.355990, l5: 0.439968, l6: 0.503208

[epoch:  50/100, batch:    68/  792, ite: 57118] train loss: 4.711167, tar: 0.486892 
l0: 0.547091, l1: 0.554259, l2: 0.554192, l3: 0.552200, l4: 0.572199, l5: 0.605737, l6: 0.691232

[epoch:  50/100, batch:    70/  792, ite: 57119] train loss: 4.711244, tar: 0.486945 
l0: 1.038958, l1: 1.051941, l2: 1.057811, l3: 1.075000, l4: 1.089838, l5: 1.040245, l6: 0.869338

[epoch:  50/100, batch:    72/  792, ite: 57120] train loss: 4.714353, tar: 0.487438 
l0: 0.381544, l1: 0.378813, l2: 0.381516, l3: 0.387228, l4: 0.388196, l5: 0.434629, l6: 0.576325

[epoch:  50/100, batch:    74/  792, ite: 57121] train loss: 4.713288, tar: 0.487344 
l0: 0.795388, l1: 0.803343, l2: 0.800943, l3: 0.789612, l4: 0.800189, l5: 0.9191

[epoch:  50/100, batch:   154/  792, ite: 57161] train loss: 4.709894, tar: 0.487078 
l0: 0.593357, l1: 0.599774, l2: 0.601943, l3: 0.604432, l4: 0.575176, l5: 0.680911, l6: 0.813598

[epoch:  50/100, batch:   156/  792, ite: 57162] train loss: 4.710446, tar: 0.487170 
l0: 0.637214, l1: 0.636295, l2: 0.638331, l3: 0.643139, l4: 0.669225, l5: 0.752518, l6: 1.079395

[epoch:  50/100, batch:   158/  792, ite: 57163] train loss: 4.711803, tar: 0.487299 
l0: 0.432733, l1: 0.443063, l2: 0.444812, l3: 0.451974, l4: 0.455379, l5: 0.506352, l6: 0.582854

[epoch:  50/100, batch:   160/  792, ite: 57164] train loss: 4.711233, tar: 0.487252 
l0: 0.465076, l1: 0.465963, l2: 0.463015, l3: 0.468486, l4: 0.508578, l5: 0.652373, l6: 0.737452

[epoch:  50/100, batch:   162/  792, ite: 57165] train loss: 4.711115, tar: 0.487233 
l0: 0.362353, l1: 0.360474, l2: 0.361228, l3: 0.363146, l4: 0.398906, l5: 0.529163, l6: 0.621885

[epoch:  50/100, batch:   164/  792, ite: 57166] train loss: 4.710220, tar: 0.48

[epoch:  50/100, batch:   244/  792, ite: 57206] train loss: 4.719642, tar: 0.488083 
l0: 0.539894, l1: 0.539617, l2: 0.532439, l3: 0.524675, l4: 0.525524, l5: 0.581102, l6: 0.613020

[epoch:  50/100, batch:   246/  792, ite: 57207] train loss: 4.719470, tar: 0.488126 
l0: 0.653734, l1: 0.652322, l2: 0.644907, l3: 0.651487, l4: 0.669599, l5: 0.823219, l6: 1.123055

[epoch:  50/100, batch:   248/  792, ite: 57208] train loss: 4.720803, tar: 0.488263 
l0: 0.235507, l1: 0.240617, l2: 0.245763, l3: 0.253520, l4: 0.285843, l5: 0.387720, l6: 0.502323

[epoch:  50/100, batch:   250/  792, ite: 57209] train loss: 4.719071, tar: 0.488054 
l0: 0.455012, l1: 0.458287, l2: 0.461266, l3: 0.465732, l4: 0.492681, l5: 0.599676, l6: 0.685237

[epoch:  50/100, batch:   252/  792, ite: 57210] train loss: 4.719041, tar: 0.488026 
l0: 0.308627, l1: 0.305860, l2: 0.305407, l3: 0.308563, l4: 0.322195, l5: 0.409879, l6: 0.478790

[epoch:  50/100, batch:   254/  792, ite: 57211] train loss: 4.717569, tar: 0.48

[epoch:  50/100, batch:   334/  792, ite: 57251] train loss: 4.731941, tar: 0.489828 
l0: 0.361090, l1: 0.367325, l2: 0.369272, l3: 0.369464, l4: 0.396934, l5: 0.560383, l6: 0.667346

[epoch:  50/100, batch:   336/  792, ite: 57252] train loss: 4.731189, tar: 0.489725 
l0: 0.524521, l1: 0.528710, l2: 0.522664, l3: 0.522862, l4: 0.555018, l5: 0.644384, l6: 0.773533

[epoch:  50/100, batch:   338/  792, ite: 57253] train loss: 4.731313, tar: 0.489753 
l0: 0.523272, l1: 0.527809, l2: 0.528113, l3: 0.523508, l4: 0.530032, l5: 0.615159, l6: 0.874886

[epoch:  50/100, batch:   340/  792, ite: 57254] train loss: 4.731537, tar: 0.489779 
l0: 0.617229, l1: 0.617750, l2: 0.620693, l3: 0.636439, l4: 0.630272, l5: 0.647767, l6: 0.585802

[epoch:  50/100, batch:   342/  792, ite: 57255] train loss: 4.731785, tar: 0.489881 
l0: 0.529402, l1: 0.534339, l2: 0.532856, l3: 0.545936, l4: 0.571036, l5: 0.619488, l6: 0.889342

[epoch:  50/100, batch:   344/  792, ite: 57256] train loss: 4.732138, tar: 0.48

[epoch:  50/100, batch:   424/  792, ite: 57296] train loss: 4.723322, tar: 0.488864 
l0: 0.327058, l1: 0.328955, l2: 0.327361, l3: 0.327690, l4: 0.346666, l5: 0.515521, l6: 0.655353

[epoch:  50/100, batch:   426/  792, ite: 57297] train loss: 4.722378, tar: 0.488739 
l0: 0.322605, l1: 0.322902, l2: 0.324206, l3: 0.336506, l4: 0.368532, l5: 0.478209, l6: 0.748374

[epoch:  50/100, batch:   428/  792, ite: 57298] train loss: 4.721584, tar: 0.488611 
l0: 0.319576, l1: 0.319347, l2: 0.321096, l3: 0.329001, l4: 0.364030, l5: 0.445349, l6: 0.673273

[epoch:  50/100, batch:   430/  792, ite: 57299] train loss: 4.720677, tar: 0.488481 
l0: 0.285843, l1: 0.283448, l2: 0.278958, l3: 0.276058, l4: 0.303866, l5: 0.385817, l6: 0.484348

[epoch:  50/100, batch:   432/  792, ite: 57300] train loss: 4.719232, tar: 0.488325 
l0: 0.380012, l1: 0.385886, l2: 0.388004, l3: 0.393500, l4: 0.433560, l5: 0.477460, l6: 0.609119

[epoch:  50/100, batch:   434/  792, ite: 57301] train loss: 4.718473, tar: 0.48

[epoch:  50/100, batch:   514/  792, ite: 57341] train loss: 4.714627, tar: 0.487772 
l0: 0.562299, l1: 0.564040, l2: 0.564788, l3: 0.562901, l4: 0.584144, l5: 0.724267, l6: 0.921881

[epoch:  50/100, batch:   516/  792, ite: 57342] train loss: 4.715218, tar: 0.487827 
l0: 0.327083, l1: 0.329013, l2: 0.328411, l3: 0.332297, l4: 0.383885, l5: 0.500812, l6: 0.649340

[epoch:  50/100, batch:   518/  792, ite: 57343] train loss: 4.714349, tar: 0.487708 
l0: 0.662161, l1: 0.662696, l2: 0.660709, l3: 0.668258, l4: 0.699147, l5: 0.852987, l6: 1.020170

[epoch:  50/100, batch:   520/  792, ite: 57344] train loss: 4.715541, tar: 0.487837 
l0: 0.314764, l1: 0.318106, l2: 0.321040, l3: 0.323385, l4: 0.366193, l5: 0.478987, l6: 0.665926

[epoch:  50/100, batch:   522/  792, ite: 57345] train loss: 4.714648, tar: 0.487709 
l0: 0.356902, l1: 0.363546, l2: 0.366533, l3: 0.343153, l4: 0.372630, l5: 0.495021, l6: 0.705202

[epoch:  50/100, batch:   524/  792, ite: 57346] train loss: 4.713950, tar: 0.48

[epoch:  50/100, batch:   604/  792, ite: 57386] train loss: 4.710174, tar: 0.487080 
l0: 0.519229, l1: 0.524854, l2: 0.528936, l3: 0.539117, l4: 0.567473, l5: 0.575669, l6: 0.692320

[epoch:  50/100, batch:   606/  792, ite: 57387] train loss: 4.710169, tar: 0.487103 
l0: 0.583851, l1: 0.590297, l2: 0.591139, l3: 0.594898, l4: 0.620350, l5: 0.705542, l6: 1.002482

[epoch:  50/100, batch:   608/  792, ite: 57388] train loss: 4.710935, tar: 0.487172 
l0: 0.506092, l1: 0.500568, l2: 0.504142, l3: 0.508627, l4: 0.505840, l5: 0.551922, l6: 0.657394

[epoch:  50/100, batch:   610/  792, ite: 57389] train loss: 4.710737, tar: 0.487186 
l0: 0.562681, l1: 0.562031, l2: 0.563167, l3: 0.570411, l4: 0.609719, l5: 0.838229, l6: 0.958084

[epoch:  50/100, batch:   612/  792, ite: 57390] train loss: 4.711427, tar: 0.487240 
l0: 0.578282, l1: 0.583007, l2: 0.583128, l3: 0.588545, l4: 0.636409, l5: 0.742897, l6: 1.023569

[epoch:  50/100, batch:   614/  792, ite: 57391] train loss: 4.712156, tar: 0.48

[epoch:  50/100, batch:   694/  792, ite: 57431] train loss: 4.709922, tar: 0.486960 
l0: 0.300877, l1: 0.308413, l2: 0.314511, l3: 0.325251, l4: 0.357453, l5: 0.452543, l6: 0.646828

[epoch:  50/100, batch:   696/  792, ite: 57432] train loss: 4.709002, tar: 0.486830 
l0: 0.483170, l1: 0.482080, l2: 0.481137, l3: 0.482149, l4: 0.511093, l5: 0.540861, l6: 0.757056

[epoch:  50/100, batch:   698/  792, ite: 57433] train loss: 4.708926, tar: 0.486827 
l0: 0.355545, l1: 0.359072, l2: 0.359811, l3: 0.364818, l4: 0.394774, l5: 0.465189, l6: 0.650050

[epoch:  50/100, batch:   700/  792, ite: 57434] train loss: 4.708243, tar: 0.486736 
l0: 0.669198, l1: 0.670157, l2: 0.670206, l3: 0.671236, l4: 0.744395, l5: 0.912620, l6: 1.174000

[epoch:  50/100, batch:   702/  792, ite: 57435] train loss: 4.709609, tar: 0.486863 
l0: 0.805826, l1: 0.825478, l2: 0.824371, l3: 0.817497, l4: 0.813849, l5: 0.904890, l6: 1.057947

[epoch:  50/100, batch:   704/  792, ite: 57436] train loss: 4.711404, tar: 0.48

[epoch:  50/100, batch:   784/  792, ite: 57476] train loss: 4.709192, tar: 0.486552 
l0: 0.466233, l1: 0.465785, l2: 0.464682, l3: 0.478942, l4: 0.532066, l5: 0.607439, l6: 0.682502

[epoch:  50/100, batch:   786/  792, ite: 57477] train loss: 4.709094, tar: 0.486538 
l0: 0.581764, l1: 0.589296, l2: 0.586444, l3: 0.591736, l4: 0.604054, l5: 0.667461, l6: 0.899311

[epoch:  50/100, batch:   788/  792, ite: 57478] train loss: 4.709709, tar: 0.486602 
l0: 0.464730, l1: 0.471636, l2: 0.468902, l3: 0.470897, l4: 0.519526, l5: 0.668407, l6: 0.845630

[epoch:  50/100, batch:   790/  792, ite: 57479] train loss: 4.709773, tar: 0.486588 
l0: 0.250361, l1: 0.252883, l2: 0.251173, l3: 0.253516, l4: 0.265237, l5: 0.392109, l6: 0.604727

[epoch:  50/100, batch:   792/  792, ite: 57480] train loss: 4.708539, tar: 0.486428 
Starting epoch 51
Epoch 51 loading complete
l0: 1.065814, l1: 1.067070, l2: 1.066633, l3: 1.065057, l4: 1.113093, l5: 1.241222, l6: 1.288094

[epoch:  51/100, batch:     2/  792,

l0: 0.633593, l1: 0.651917, l2: 0.633608, l3: 0.622004, l4: 0.632632, l5: 0.717870, l6: 0.796661

[epoch:  51/100, batch:    82/  792, ite: 57521] train loss: 4.707538, tar: 0.486180 
l0: 0.273808, l1: 0.274369, l2: 0.276071, l3: 0.282876, l4: 0.322755, l5: 0.403134, l6: 0.536159

[epoch:  51/100, batch:    84/  792, ite: 57522] train loss: 4.706427, tar: 0.486040 
l0: 0.605566, l1: 0.603786, l2: 0.603043, l3: 0.602008, l4: 0.608893, l5: 0.594875, l6: 0.788791

[epoch:  51/100, batch:    86/  792, ite: 57523] train loss: 4.706769, tar: 0.486119 
l0: 0.350810, l1: 0.348785, l2: 0.352315, l3: 0.357384, l4: 0.385391, l5: 0.584281, l6: 0.771883

[epoch:  51/100, batch:    88/  792, ite: 57524] train loss: 4.706260, tar: 0.486030 
l0: 0.368248, l1: 0.371676, l2: 0.373587, l3: 0.388962, l4: 0.393150, l5: 0.488746, l6: 0.544587

[epoch:  51/100, batch:    90/  792, ite: 57525] train loss: 4.705551, tar: 0.485953 
l0: 0.491635, l1: 0.494797, l2: 0.497488, l3: 0.507906, l4: 0.537230, l5: 0.6159

[epoch:  51/100, batch:   170/  792, ite: 57565] train loss: 4.708439, tar: 0.486239 
l0: 0.557633, l1: 0.558333, l2: 0.557951, l3: 0.556150, l4: 0.586995, l5: 0.583137, l6: 0.706833

[epoch:  51/100, batch:   172/  792, ite: 57566] train loss: 4.708581, tar: 0.486284 
l0: 0.237623, l1: 0.239382, l2: 0.242402, l3: 0.250643, l4: 0.287139, l5: 0.362637, l6: 0.518163

[epoch:  51/100, batch:   174/  792, ite: 57567] train loss: 4.707315, tar: 0.486125 
l0: 0.425204, l1: 0.423458, l2: 0.423466, l3: 0.426771, l4: 0.420477, l5: 0.490161, l6: 0.670694

[epoch:  51/100, batch:   176/  792, ite: 57568] train loss: 4.706833, tar: 0.486087 
l0: 0.727310, l1: 0.732040, l2: 0.731535, l3: 0.732832, l4: 0.723911, l5: 0.726970, l6: 0.804298

[epoch:  51/100, batch:   178/  792, ite: 57569] train loss: 4.707693, tar: 0.486240 
l0: 0.352689, l1: 0.354143, l2: 0.351759, l3: 0.354192, l4: 0.371939, l5: 0.472309, l6: 0.623059

[epoch:  51/100, batch:   180/  792, ite: 57570] train loss: 4.706940, tar: 0.48

[epoch:  51/100, batch:   260/  792, ite: 57610] train loss: 4.701700, tar: 0.485657 
l0: 0.694156, l1: 0.705015, l2: 0.700628, l3: 0.704883, l4: 0.723377, l5: 0.775735, l6: 1.002386

[epoch:  51/100, batch:   262/  792, ite: 57611] train loss: 4.702690, tar: 0.485786 
l0: 0.742770, l1: 0.744751, l2: 0.748342, l3: 0.746793, l4: 0.741600, l5: 0.810368, l6: 0.909973

[epoch:  51/100, batch:   264/  792, ite: 57612] train loss: 4.703773, tar: 0.485946 
l0: 0.451284, l1: 0.447579, l2: 0.449267, l3: 0.455718, l4: 0.474547, l5: 0.589072, l6: 0.840817

[epoch:  51/100, batch:   266/  792, ite: 57613] train loss: 4.703719, tar: 0.485924 
l0: 2.073286, l1: 2.075428, l2: 2.081351, l3: 2.127067, l4: 2.277323, l5: 2.343342, l6: 2.284320

[epoch:  51/100, batch:   268/  792, ite: 57614] train loss: 4.711581, tar: 0.486908 
l0: 0.928139, l1: 0.960344, l2: 0.951018, l3: 0.946750, l4: 1.003426, l5: 1.005202, l6: 0.899669

[epoch:  51/100, batch:   270/  792, ite: 57615] train loss: 4.713445, tar: 0.48

[epoch:  51/100, batch:   350/  792, ite: 57655] train loss: 4.715238, tar: 0.487475 
l0: 0.322338, l1: 0.316403, l2: 0.315692, l3: 0.327790, l4: 0.341691, l5: 0.445580, l6: 0.649265

[epoch:  51/100, batch:   352/  792, ite: 57656] train loss: 4.714410, tar: 0.487375 
l0: 0.325744, l1: 0.324800, l2: 0.322978, l3: 0.326872, l4: 0.329483, l5: 0.398835, l6: 0.445056

[epoch:  51/100, batch:   354/  792, ite: 57657] train loss: 4.713352, tar: 0.487277 
l0: 0.422205, l1: 0.423093, l2: 0.422611, l3: 0.425923, l4: 0.423995, l5: 0.562304, l6: 0.747166

[epoch:  51/100, batch:   356/  792, ite: 57658] train loss: 4.713039, tar: 0.487238 
l0: 0.436511, l1: 0.432888, l2: 0.435289, l3: 0.444732, l4: 0.471419, l5: 0.531040, l6: 0.661721

[epoch:  51/100, batch:   358/  792, ite: 57659] train loss: 4.712715, tar: 0.487207 
l0: 0.700083, l1: 0.706343, l2: 0.708857, l3: 0.705802, l4: 0.716977, l5: 0.778308, l6: 0.917612

[epoch:  51/100, batch:   360/  792, ite: 57660] train loss: 4.713654, tar: 0.48

[epoch:  51/100, batch:   440/  792, ite: 57700] train loss: 4.712426, tar: 0.487249 
l0: 0.315481, l1: 0.313671, l2: 0.311986, l3: 0.316844, l4: 0.351435, l5: 0.437932, l6: 0.674929

[epoch:  51/100, batch:   442/  792, ite: 57701] train loss: 4.711651, tar: 0.487148 
l0: 0.291478, l1: 0.296012, l2: 0.290603, l3: 0.294663, l4: 0.355530, l5: 0.561141, l6: 0.622954

[epoch:  51/100, batch:   444/  792, ite: 57702] train loss: 4.710873, tar: 0.487033 
l0: 0.485279, l1: 0.489700, l2: 0.490567, l3: 0.492487, l4: 0.535657, l5: 0.611505, l6: 0.676423

[epoch:  51/100, batch:   446/  792, ite: 57703] train loss: 4.710818, tar: 0.487032 
l0: 0.909013, l1: 0.902636, l2: 0.903965, l3: 0.932016, l4: 1.015667, l5: 1.125808, l6: 1.194645

[epoch:  51/100, batch:   448/  792, ite: 57704] train loss: 4.712884, tar: 0.487280 
l0: 0.426697, l1: 0.431141, l2: 0.433384, l3: 0.438103, l4: 0.474290, l5: 0.515373, l6: 0.778092

[epoch:  51/100, batch:   450/  792, ite: 57705] train loss: 4.712689, tar: 0.48

[epoch:  51/100, batch:   530/  792, ite: 57745] train loss: 4.705808, tar: 0.486390 
l0: 0.400993, l1: 0.401041, l2: 0.403297, l3: 0.408287, l4: 0.426438, l5: 0.505007, l6: 0.589795

[epoch:  51/100, batch:   532/  792, ite: 57746] train loss: 4.705269, tar: 0.486341 
l0: 0.329199, l1: 0.328680, l2: 0.327500, l3: 0.326442, l4: 0.335538, l5: 0.424782, l6: 0.554527

[epoch:  51/100, batch:   534/  792, ite: 57747] train loss: 4.704413, tar: 0.486251 
l0: 0.515649, l1: 0.505575, l2: 0.509173, l3: 0.510737, l4: 0.537436, l5: 0.611025, l6: 1.193432

[epoch:  51/100, batch:   536/  792, ite: 57748] train loss: 4.704970, tar: 0.486268 
l0: 0.520508, l1: 0.521435, l2: 0.520657, l3: 0.509203, l4: 0.516959, l5: 0.589640, l6: 0.865796

[epoch:  51/100, batch:   538/  792, ite: 57749] train loss: 4.705142, tar: 0.486288 
l0: 0.460144, l1: 0.462305, l2: 0.463819, l3: 0.464275, l4: 0.474831, l5: 0.558169, l6: 0.701290

[epoch:  51/100, batch:   540/  792, ite: 57750] train loss: 4.704918, tar: 0.48

[epoch:  51/100, batch:   620/  792, ite: 57790] train loss: 4.698138, tar: 0.485460 
l0: 0.345409, l1: 0.353131, l2: 0.352433, l3: 0.359155, l4: 0.396720, l5: 0.489965, l6: 0.748840

[epoch:  51/100, batch:   622/  792, ite: 57791] train loss: 4.697691, tar: 0.485382 
l0: 0.407064, l1: 0.410828, l2: 0.408203, l3: 0.410223, l4: 0.446887, l5: 0.567382, l6: 0.791387

[epoch:  51/100, batch:   624/  792, ite: 57792] train loss: 4.697464, tar: 0.485338 
l0: 0.384218, l1: 0.386020, l2: 0.388305, l3: 0.404712, l4: 0.436216, l5: 0.561594, l6: 0.764527

[epoch:  51/100, batch:   626/  792, ite: 57793] train loss: 4.697131, tar: 0.485282 
l0: 0.566338, l1: 0.568154, l2: 0.566888, l3: 0.561456, l4: 0.567615, l5: 0.620525, l6: 0.719807

[epoch:  51/100, batch:   628/  792, ite: 57794] train loss: 4.697242, tar: 0.485327 
l0: 0.510106, l1: 0.515480, l2: 0.514988, l3: 0.509962, l4: 0.533476, l5: 0.554071, l6: 0.907448

[epoch:  51/100, batch:   630/  792, ite: 57795] train loss: 4.697331, tar: 0.48

[epoch:  51/100, batch:   710/  792, ite: 57835] train loss: 4.698133, tar: 0.485290 
l0: 0.471274, l1: 0.470678, l2: 0.468352, l3: 0.457233, l4: 0.472352, l5: 0.604554, l6: 0.801272

[epoch:  51/100, batch:   712/  792, ite: 57836] train loss: 4.698131, tar: 0.485282 
l0: 0.715681, l1: 0.720320, l2: 0.717934, l3: 0.722170, l4: 0.756671, l5: 0.821430, l6: 1.007388

[epoch:  51/100, batch:   714/  792, ite: 57837] train loss: 4.699084, tar: 0.485407 
l0: 0.590634, l1: 0.592090, l2: 0.584807, l3: 0.580265, l4: 0.600102, l5: 0.743829, l6: 0.870655

[epoch:  51/100, batch:   716/  792, ite: 57838] train loss: 4.699525, tar: 0.485465 
l0: 0.932143, l1: 0.947852, l2: 0.947463, l3: 0.951479, l4: 0.982780, l5: 1.007866, l6: 1.000997

[epoch:  51/100, batch:   718/  792, ite: 57839] train loss: 4.701194, tar: 0.485708 
l0: 1.627263, l1: 1.628392, l2: 1.635126, l3: 1.659783, l4: 1.719454, l5: 1.847594, l6: 1.822737

[epoch:  51/100, batch:   720/  792, ite: 57840] train loss: 4.706083, tar: 0.48

l0: 0.374338, l1: 0.376440, l2: 0.377297, l3: 0.376356, l4: 0.402044, l5: 0.456619, l6: 0.670396

[epoch:  52/100, batch:     8/  792, ite: 57880] train loss: 4.704307, tar: 0.486071 
l0: 0.606211, l1: 0.591480, l2: 0.597883, l3: 0.601184, l4: 0.615276, l5: 0.618023, l6: 1.272333

[epoch:  52/100, batch:    10/  792, ite: 57881] train loss: 4.705140, tar: 0.486135 
l0: 0.300244, l1: 0.300796, l2: 0.297450, l3: 0.292106, l4: 0.332505, l5: 0.433934, l6: 0.684833

[epoch:  52/100, batch:    12/  792, ite: 57882] train loss: 4.704441, tar: 0.486036 
l0: 0.549549, l1: 0.557807, l2: 0.555197, l3: 0.557422, l4: 0.562780, l5: 0.694737, l6: 0.878995

[epoch:  52/100, batch:    14/  792, ite: 57883] train loss: 4.704759, tar: 0.486070 
l0: 0.508458, l1: 0.518191, l2: 0.518421, l3: 0.530696, l4: 0.557788, l5: 0.648084, l6: 1.042578

[epoch:  52/100, batch:    16/  792, ite: 57884] train loss: 4.705141, tar: 0.486082 
l0: 0.462140, l1: 0.462411, l2: 0.465286, l3: 0.464026, l4: 0.500025, l5: 0.5716

[epoch:  52/100, batch:    96/  792, ite: 57924] train loss: 4.703431, tar: 0.485800 
l0: 0.373514, l1: 0.379621, l2: 0.382402, l3: 0.394126, l4: 0.413228, l5: 0.620506, l6: 0.959648

[epoch:  52/100, batch:    98/  792, ite: 57925] train loss: 4.703269, tar: 0.485741 
l0: 0.438006, l1: 0.445412, l2: 0.445383, l3: 0.453849, l4: 0.486489, l5: 0.547734, l6: 0.766560

[epoch:  52/100, batch:   100/  792, ite: 57926] train loss: 4.703145, tar: 0.485716 
l0: 0.573940, l1: 0.575074, l2: 0.571023, l3: 0.573834, l4: 0.592874, l5: 0.693601, l6: 0.879249

[epoch:  52/100, batch:   102/  792, ite: 57927] train loss: 4.703560, tar: 0.485762 
l0: 0.775065, l1: 0.789278, l2: 0.793094, l3: 0.789015, l4: 0.800360, l5: 0.883909, l6: 0.957717

[epoch:  52/100, batch:   104/  792, ite: 57928] train loss: 4.704718, tar: 0.485912 
l0: 0.538721, l1: 0.543823, l2: 0.543012, l3: 0.558241, l4: 0.574150, l5: 0.644322, l6: 0.878018

[epoch:  52/100, batch:   106/  792, ite: 57929] train loss: 4.705023, tar: 0.48

[epoch:  52/100, batch:   186/  792, ite: 57969] train loss: 4.695390, tar: 0.484730 
l0: 0.311656, l1: 0.315537, l2: 0.309763, l3: 0.303498, l4: 0.328229, l5: 0.410129, l6: 0.448527

[epoch:  52/100, batch:   188/  792, ite: 57970] train loss: 4.694505, tar: 0.484643 
l0: 0.658480, l1: 0.666543, l2: 0.664606, l3: 0.661664, l4: 0.711802, l5: 0.846494, l6: 1.001251

[epoch:  52/100, batch:   190/  792, ite: 57971] train loss: 4.695312, tar: 0.484731 
l0: 0.589893, l1: 0.591893, l2: 0.591759, l3: 0.587646, l4: 0.617886, l5: 0.714881, l6: 0.876861

[epoch:  52/100, batch:   192/  792, ite: 57972] train loss: 4.695736, tar: 0.484784 
l0: 0.358808, l1: 0.362638, l2: 0.365229, l3: 0.366447, l4: 0.365519, l5: 0.416194, l6: 0.540626

[epoch:  52/100, batch:   194/  792, ite: 57973] train loss: 4.695105, tar: 0.484720 
l0: 0.353084, l1: 0.351313, l2: 0.353494, l3: 0.348759, l4: 0.367200, l5: 0.455585, l6: 0.625198

[epoch:  52/100, batch:   196/  792, ite: 57974] train loss: 4.694488, tar: 0.48

[epoch:  52/100, batch:   276/  792, ite: 58014] train loss: 3.943843, tar: 0.386398 
l0: 0.456505, l1: 0.460373, l2: 0.460466, l3: 0.470236, l4: 0.487168, l5: 0.615059, l6: 0.782619

[epoch:  52/100, batch:   278/  792, ite: 58015] train loss: 3.994530, tar: 0.391072 
l0: 0.423283, l1: 0.427882, l2: 0.428492, l3: 0.445938, l4: 0.474360, l5: 0.599079, l6: 0.736379

[epoch:  52/100, batch:   280/  792, ite: 58016] train loss: 4.017995, tar: 0.393085 
l0: 0.223244, l1: 0.223513, l2: 0.220417, l3: 0.217657, l4: 0.265766, l5: 0.404080, l6: 0.569747

[epoch:  52/100, batch:   282/  792, ite: 58017] train loss: 3.941818, tar: 0.383094 
l0: 0.519260, l1: 0.527306, l2: 0.526908, l3: 0.523122, l4: 0.538731, l5: 0.546276, l6: 0.618885

[epoch:  52/100, batch:   284/  792, ite: 58018] train loss: 3.970348, tar: 0.390659 
l0: 0.397857, l1: 0.398172, l2: 0.400581, l3: 0.401538, l4: 0.448556, l5: 0.604440, l6: 0.827940

[epoch:  52/100, batch:   286/  792, ite: 58019] train loss: 3.991987, tar: 0.39

[epoch:  52/100, batch:   366/  792, ite: 58059] train loss: 4.788611, tar: 0.497347 
l0: 0.478259, l1: 0.479616, l2: 0.478336, l3: 0.488334, l4: 0.490000, l5: 0.562195, l6: 0.623041

[epoch:  52/100, batch:   368/  792, ite: 58060] train loss: 4.779599, tar: 0.497029 
l0: 0.284308, l1: 0.293533, l2: 0.288581, l3: 0.302628, l4: 0.341105, l5: 0.473752, l6: 0.603079

[epoch:  52/100, batch:   370/  792, ite: 58061] train loss: 4.754349, tar: 0.493542 
l0: 0.387336, l1: 0.393861, l2: 0.391106, l3: 0.399888, l4: 0.431122, l5: 0.600340, l6: 0.787731

[epoch:  52/100, batch:   372/  792, ite: 58062] train loss: 4.747308, tar: 0.491829 
l0: 0.416950, l1: 0.416107, l2: 0.413895, l3: 0.408727, l4: 0.448903, l5: 0.518693, l6: 0.701802

[epoch:  52/100, batch:   374/  792, ite: 58063] train loss: 4.736936, tar: 0.490640 
l0: 0.371575, l1: 0.376635, l2: 0.370970, l3: 0.364851, l4: 0.381234, l5: 0.477690, l6: 0.608858

[epoch:  52/100, batch:   376/  792, ite: 58064] train loss: 4.718760, tar: 0.48

[epoch:  52/100, batch:   456/  792, ite: 58104] train loss: 4.611842, tar: 0.476764 
l0: 0.494764, l1: 0.495569, l2: 0.497275, l3: 0.512502, l4: 0.554117, l5: 0.647190, l6: 0.690523

[epoch:  52/100, batch:   458/  792, ite: 58105] train loss: 4.612376, tar: 0.476935 
l0: 0.438237, l1: 0.443889, l2: 0.442710, l3: 0.455163, l4: 0.484380, l5: 0.571249, l6: 0.667105

[epoch:  52/100, batch:   460/  792, ite: 58106] train loss: 4.608160, tar: 0.476570 
l0: 0.507191, l1: 0.500996, l2: 0.497451, l3: 0.492303, l4: 0.490084, l5: 0.527826, l6: 0.682737

[epoch:  52/100, batch:   462/  792, ite: 58107] train loss: 4.606485, tar: 0.476856 
l0: 0.438162, l1: 0.436852, l2: 0.437647, l3: 0.452924, l4: 0.498614, l5: 0.542738, l6: 0.771326

[epoch:  52/100, batch:   464/  792, ite: 58108] train loss: 4.604075, tar: 0.476498 
l0: 0.356055, l1: 0.356774, l2: 0.358558, l3: 0.362786, l4: 0.352030, l5: 0.420390, l6: 0.585688

[epoch:  52/100, batch:   466/  792, ite: 58109] train loss: 4.593516, tar: 0.47

[epoch:  52/100, batch:   546/  792, ite: 58149] train loss: 4.784577, tar: 0.499243 
l0: 0.393614, l1: 0.399637, l2: 0.398082, l3: 0.390443, l4: 0.431961, l5: 0.586051, l6: 0.775101

[epoch:  52/100, batch:   548/  792, ite: 58150] train loss: 4.780386, tar: 0.498539 
l0: 0.483428, l1: 0.483231, l2: 0.485879, l3: 0.493951, l4: 0.519608, l5: 0.584885, l6: 0.713128

[epoch:  52/100, batch:   550/  792, ite: 58151] train loss: 4.778963, tar: 0.498439 
l0: 0.628306, l1: 0.636139, l2: 0.638031, l3: 0.644281, l4: 0.684077, l5: 0.789680, l6: 0.884834

[epoch:  52/100, batch:   552/  792, ite: 58152] train loss: 4.785982, tar: 0.499293 
l0: 0.501169, l1: 0.499323, l2: 0.500388, l3: 0.509156, l4: 0.560405, l5: 0.694295, l6: 0.929862

[epoch:  52/100, batch:   554/  792, ite: 58153] train loss: 4.788862, tar: 0.499306 
l0: 0.627262, l1: 0.625922, l2: 0.629989, l3: 0.630425, l4: 0.632495, l5: 0.682081, l6: 0.937016

[epoch:  52/100, batch:   556/  792, ite: 58154] train loss: 4.795044, tar: 0.50

[epoch:  52/100, batch:   636/  792, ite: 58194] train loss: 4.717990, tar: 0.492127 
l0: 0.361084, l1: 0.366036, l2: 0.364965, l3: 0.368028, l4: 0.385294, l5: 0.481272, l6: 0.632099

[epoch:  52/100, batch:   638/  792, ite: 58195] train loss: 4.712489, tar: 0.491455 
l0: 0.743470, l1: 0.751612, l2: 0.753381, l3: 0.759117, l4: 0.768780, l5: 0.816333, l6: 1.081406

[epoch:  52/100, batch:   640/  792, ite: 58196] train loss: 4.723189, tar: 0.492740 
l0: 0.393641, l1: 0.399783, l2: 0.403777, l3: 0.409174, l4: 0.451780, l5: 0.545461, l6: 0.815058

[epoch:  52/100, batch:   642/  792, ite: 58197] train loss: 4.721644, tar: 0.492237 
l0: 0.807395, l1: 0.825710, l2: 0.828149, l3: 0.852659, l4: 0.868317, l5: 0.970610, l6: 1.035968

[epoch:  52/100, batch:   644/  792, ite: 58198] train loss: 4.734467, tar: 0.493829 
l0: 0.872358, l1: 0.893354, l2: 0.869622, l3: 0.851847, l4: 0.893987, l5: 0.983819, l6: 1.070058

[epoch:  52/100, batch:   646/  792, ite: 58199] train loss: 4.748142, tar: 0.49

[epoch:  52/100, batch:   726/  792, ite: 58239] train loss: 4.765361, tar: 0.496461 
l0: 0.341561, l1: 0.344441, l2: 0.348235, l3: 0.347144, l4: 0.380221, l5: 0.435232, l6: 0.683899

[epoch:  52/100, batch:   728/  792, ite: 58240] train loss: 4.760486, tar: 0.495816 
l0: 0.428298, l1: 0.428628, l2: 0.428354, l3: 0.425358, l4: 0.432220, l5: 0.522261, l6: 0.698359

[epoch:  52/100, batch:   730/  792, ite: 58241] train loss: 4.757753, tar: 0.495536 
l0: 0.700228, l1: 0.697413, l2: 0.690333, l3: 0.701159, l4: 0.758632, l5: 0.920269, l6: 1.173553

[epoch:  52/100, batch:   732/  792, ite: 58242] train loss: 4.766697, tar: 0.496381 
l0: 0.312145, l1: 0.315498, l2: 0.316551, l3: 0.320275, l4: 0.333568, l5: 0.406473, l6: 0.535975

[epoch:  52/100, batch:   734/  792, ite: 58243] train loss: 4.760471, tar: 0.495623 
l0: 0.677796, l1: 0.680829, l2: 0.681433, l3: 0.679795, l4: 0.711651, l5: 0.717634, l6: 0.865443

[epoch:  52/100, batch:   736/  792, ite: 58244] train loss: 4.765457, tar: 0.49

l0: 0.277388, l1: 0.283882, l2: 0.288245, l3: 0.304532, l4: 0.362086, l5: 0.548358, l6: 0.639012

[epoch:  53/100, batch:    24/  792, ite: 58284] train loss: 4.739690, tar: 0.492387 
l0: 0.323421, l1: 0.332593, l2: 0.332716, l3: 0.335933, l4: 0.340223, l5: 0.453553, l6: 0.759152

[epoch:  53/100, batch:    26/  792, ite: 58285] train loss: 4.735628, tar: 0.491795 
l0: 0.721776, l1: 0.735885, l2: 0.734980, l3: 0.735265, l4: 0.746610, l5: 0.788328, l6: 0.893237

[epoch:  53/100, batch:    28/  792, ite: 58286] train loss: 4.741239, tar: 0.492599 
l0: 0.548993, l1: 0.543717, l2: 0.540857, l3: 0.550519, l4: 0.574270, l5: 0.616924, l6: 0.943579

[epoch:  53/100, batch:    30/  792, ite: 58287] train loss: 4.743012, tar: 0.492795 
l0: 0.349096, l1: 0.344859, l2: 0.345487, l3: 0.355702, l4: 0.414363, l5: 0.559463, l6: 0.793891

[epoch:  53/100, batch:    32/  792, ite: 58288] train loss: 4.740406, tar: 0.492296 
l0: 0.557173, l1: 0.557650, l2: 0.552712, l3: 0.562025, l4: 0.576237, l5: 0.7281

[epoch:  53/100, batch:   112/  792, ite: 58328] train loss: 4.746742, tar: 0.492556 
l0: 0.353896, l1: 0.354675, l2: 0.354425, l3: 0.358113, l4: 0.371386, l5: 0.412171, l6: 0.526929

[epoch:  53/100, batch:   114/  792, ite: 58329] train loss: 4.742665, tar: 0.492134 
l0: 0.647844, l1: 0.659700, l2: 0.652660, l3: 0.644048, l4: 0.645698, l5: 0.691835, l6: 0.758932

[epoch:  53/100, batch:   116/  792, ite: 58330] train loss: 4.744810, tar: 0.492606 
l0: 0.729872, l1: 0.736155, l2: 0.737219, l3: 0.726278, l4: 0.735519, l5: 0.809036, l6: 0.920676

[epoch:  53/100, batch:   118/  792, ite: 58331] train loss: 4.749672, tar: 0.493323 
l0: 0.455869, l1: 0.461060, l2: 0.463044, l3: 0.470751, l4: 0.492592, l5: 0.603792, l6: 0.721292

[epoch:  53/100, batch:   120/  792, ite: 58332] train loss: 4.748631, tar: 0.493210 
l0: 0.673605, l1: 0.674458, l2: 0.678497, l3: 0.676451, l4: 0.687422, l5: 0.733683, l6: 0.951686

[epoch:  53/100, batch:   122/  792, ite: 58333] train loss: 4.752595, tar: 0.49

[epoch:  53/100, batch:   202/  792, ite: 58373] train loss: 4.782259, tar: 0.497248 
l0: 0.526199, l1: 0.524594, l2: 0.524193, l3: 0.531097, l4: 0.540159, l5: 0.590606, l6: 0.815694

[epoch:  53/100, batch:   204/  792, ite: 58374] train loss: 4.782773, tar: 0.497326 
l0: 0.334525, l1: 0.344318, l2: 0.341915, l3: 0.344489, l4: 0.407256, l5: 0.652872, l6: 0.983552

[epoch:  53/100, batch:   206/  792, ite: 58375] train loss: 4.781462, tar: 0.496892 
l0: 0.377147, l1: 0.382051, l2: 0.379939, l3: 0.382376, l4: 0.421112, l5: 0.531037, l6: 0.641861

[epoch:  53/100, batch:   208/  792, ite: 58376] train loss: 4.778884, tar: 0.496573 
l0: 0.448555, l1: 0.449303, l2: 0.447931, l3: 0.450403, l4: 0.487936, l5: 0.659315, l6: 0.832763

[epoch:  53/100, batch:   210/  792, ite: 58377] train loss: 4.778741, tar: 0.496446 
l0: 0.497819, l1: 0.499267, l2: 0.504612, l3: 0.518327, l4: 0.578227, l5: 0.691800, l6: 0.898351

[epoch:  53/100, batch:   212/  792, ite: 58378] train loss: 4.779894, tar: 0.49

[epoch:  53/100, batch:   292/  792, ite: 58418] train loss: 4.786086, tar: 0.496600 
l0: 0.277011, l1: 0.279132, l2: 0.284665, l3: 0.287080, l4: 0.322461, l5: 0.433615, l6: 0.649773

[epoch:  53/100, batch:   294/  792, ite: 58419] train loss: 4.782679, tar: 0.496075 
l0: 0.371975, l1: 0.375635, l2: 0.376308, l3: 0.375804, l4: 0.394246, l5: 0.474355, l6: 0.657207

[epoch:  53/100, batch:   296/  792, ite: 58420] train loss: 4.780188, tar: 0.495780 
l0: 0.317745, l1: 0.319418, l2: 0.323091, l3: 0.331874, l4: 0.358625, l5: 0.476020, l6: 0.589774

[epoch:  53/100, batch:   298/  792, ite: 58421] train loss: 4.777156, tar: 0.495357 
l0: 0.415066, l1: 0.418539, l2: 0.420033, l3: 0.424954, l4: 0.444961, l5: 0.528780, l6: 0.554815

[epoch:  53/100, batch:   300/  792, ite: 58422] train loss: 4.774864, tar: 0.495167 
l0: 0.334343, l1: 0.337798, l2: 0.339258, l3: 0.340214, l4: 0.374921, l5: 0.464262, l6: 0.583964

[epoch:  53/100, batch:   302/  792, ite: 58423] train loss: 4.771634, tar: 0.49

[epoch:  53/100, batch:   382/  792, ite: 58463] train loss: 4.760504, tar: 0.493171 
l0: 0.216627, l1: 0.222200, l2: 0.223022, l3: 0.236186, l4: 0.270987, l5: 0.404567, l6: 0.502585

[epoch:  53/100, batch:   384/  792, ite: 58464] train loss: 4.755891, tar: 0.492575 
l0: 0.521033, l1: 0.529807, l2: 0.527757, l3: 0.523499, l4: 0.542998, l5: 0.595402, l6: 0.721918

[epoch:  53/100, batch:   386/  792, ite: 58465] train loss: 4.755785, tar: 0.492636 
l0: 0.276531, l1: 0.279640, l2: 0.277555, l3: 0.286468, l4: 0.320254, l5: 0.430482, l6: 0.540411

[epoch:  53/100, batch:   388/  792, ite: 58466] train loss: 4.752003, tar: 0.492172 
l0: 0.433979, l1: 0.434190, l2: 0.428532, l3: 0.426444, l4: 0.424014, l5: 0.501975, l6: 0.608763

[epoch:  53/100, batch:   390/  792, ite: 58467] train loss: 4.750210, tar: 0.492047 
l0: 0.559545, l1: 0.556330, l2: 0.551205, l3: 0.546295, l4: 0.545620, l5: 0.629693, l6: 0.825327

[epoch:  53/100, batch:   392/  792, ite: 58468] train loss: 4.750954, tar: 0.49

[epoch:  53/100, batch:   472/  792, ite: 58508] train loss: 4.726325, tar: 0.489285 
l0: 0.395452, l1: 0.395842, l2: 0.390306, l3: 0.387812, l4: 0.423827, l5: 0.500125, l6: 0.676921

[epoch:  53/100, batch:   474/  792, ite: 58509] train loss: 4.724615, tar: 0.489100 
l0: 0.421194, l1: 0.418817, l2: 0.420374, l3: 0.428287, l4: 0.477655, l5: 0.562424, l6: 0.766559

[epoch:  53/100, batch:   476/  792, ite: 58510] train loss: 4.723909, tar: 0.488967 
l0: 0.577567, l1: 0.584010, l2: 0.584416, l3: 0.587100, l4: 0.602192, l5: 0.690058, l6: 0.784382

[epoch:  53/100, batch:   478/  792, ite: 58511] train loss: 4.724944, tar: 0.489140 
l0: 0.452400, l1: 0.452045, l2: 0.456770, l3: 0.456090, l4: 0.482329, l5: 0.602844, l6: 0.809332

[epoch:  53/100, batch:   480/  792, ite: 58512] train loss: 4.724688, tar: 0.489069 
l0: 0.427760, l1: 0.428575, l2: 0.425943, l3: 0.437278, l4: 0.439738, l5: 0.528006, l6: 0.657742

[epoch:  53/100, batch:   482/  792, ite: 58513] train loss: 4.723294, tar: 0.48

[epoch:  53/100, batch:   562/  792, ite: 58553] train loss: 4.711644, tar: 0.487569 
l0: 0.292034, l1: 0.291116, l2: 0.290545, l3: 0.306873, l4: 0.330428, l5: 0.414647, l6: 0.469313

[epoch:  53/100, batch:   564/  792, ite: 58554] train loss: 4.708383, tar: 0.487216 
l0: 0.416022, l1: 0.416180, l2: 0.417952, l3: 0.421481, l4: 0.437258, l5: 0.556492, l6: 0.692335

[epoch:  53/100, batch:   566/  792, ite: 58555] train loss: 4.707447, tar: 0.487088 
l0: 0.976153, l1: 1.002020, l2: 0.996252, l3: 0.997857, l4: 1.028123, l5: 0.983217, l6: 1.130630

[epoch:  53/100, batch:   568/  792, ite: 58556] train loss: 4.713977, tar: 0.487968 
l0: 0.450020, l1: 0.450299, l2: 0.448235, l3: 0.436874, l4: 0.431813, l5: 0.545147, l6: 0.630437

[epoch:  53/100, batch:   570/  792, ite: 58557] train loss: 4.712773, tar: 0.487900 
l0: 0.748830, l1: 0.757343, l2: 0.759258, l3: 0.761243, l4: 0.791806, l5: 0.822411, l6: 1.004366

[epoch:  53/100, batch:   572/  792, ite: 58558] train loss: 4.716127, tar: 0.48

[epoch:  53/100, batch:   652/  792, ite: 58598] train loss: 4.716982, tar: 0.488464 
l0: 0.479158, l1: 0.481432, l2: 0.484115, l3: 0.484063, l4: 0.477542, l5: 0.610555, l6: 0.678953

[epoch:  53/100, batch:   654/  792, ite: 58599] train loss: 4.716667, tar: 0.488448 
l0: 0.736634, l1: 0.734033, l2: 0.729104, l3: 0.725394, l4: 0.752004, l5: 0.881011, l6: 0.973247

[epoch:  53/100, batch:   656/  792, ite: 58600] train loss: 4.719689, tar: 0.488862 
l0: 0.726975, l1: 0.729933, l2: 0.729286, l3: 0.736382, l4: 0.766799, l5: 0.838704, l6: 0.949505

[epoch:  53/100, batch:   658/  792, ite: 58601] train loss: 4.722456, tar: 0.489258 
l0: 0.544772, l1: 0.550782, l2: 0.553404, l3: 0.557943, l4: 0.586986, l5: 0.660975, l6: 0.908868

[epoch:  53/100, batch:   660/  792, ite: 58602] train loss: 4.723416, tar: 0.489350 
l0: 0.577162, l1: 0.580024, l2: 0.580423, l3: 0.582337, l4: 0.603313, l5: 0.706447, l6: 0.955244

[epoch:  53/100, batch:   662/  792, ite: 58603] train loss: 4.724912, tar: 0.48

[epoch:  53/100, batch:   742/  792, ite: 58643] train loss: 4.705949, tar: 0.487436 
l0: 0.375943, l1: 0.375388, l2: 0.377104, l3: 0.381483, l4: 0.391695, l5: 0.506527, l6: 0.610248

[epoch:  53/100, batch:   744/  792, ite: 58644] train loss: 4.704280, tar: 0.487263 
l0: 0.342771, l1: 0.348587, l2: 0.346463, l3: 0.343702, l4: 0.361220, l5: 0.449226, l6: 0.610395

[epoch:  53/100, batch:   746/  792, ite: 58645] train loss: 4.702380, tar: 0.487039 
l0: 0.494830, l1: 0.494912, l2: 0.491016, l3: 0.505557, l4: 0.560833, l5: 0.633211, l6: 0.848526

[epoch:  53/100, batch:   748/  792, ite: 58646] train loss: 4.702876, tar: 0.487051 
l0: 0.440336, l1: 0.446453, l2: 0.446404, l3: 0.447799, l4: 0.499852, l5: 0.587955, l6: 0.800177

[epoch:  53/100, batch:   750/  792, ite: 58647] train loss: 4.702488, tar: 0.486979 
l0: 0.318803, l1: 0.324075, l2: 0.322943, l3: 0.322884, l4: 0.355593, l5: 0.487232, l6: 0.652321

[epoch:  53/100, batch:   752/  792, ite: 58648] train loss: 4.700520, tar: 0.48

l0: 0.392683, l1: 0.389665, l2: 0.384212, l3: 0.376509, l4: 0.383027, l5: 0.499797, l6: 0.628711

[epoch:  54/100, batch:    40/  792, ite: 58688] train loss: 4.675578, tar: 0.483006 
l0: 0.580939, l1: 0.588139, l2: 0.590269, l3: 0.604111, l4: 0.671909, l5: 0.868416, l6: 1.260660

[epoch:  54/100, batch:    42/  792, ite: 58689] train loss: 4.678493, tar: 0.483148 
l0: 0.322347, l1: 0.325580, l2: 0.326019, l3: 0.325284, l4: 0.336459, l5: 0.474020, l6: 0.577339

[epoch:  54/100, batch:    44/  792, ite: 58690] train loss: 4.676645, tar: 0.482915 
l0: 0.495976, l1: 0.495503, l2: 0.495080, l3: 0.505084, l4: 0.530418, l5: 0.613543, l6: 0.802737

[epoch:  54/100, batch:    46/  792, ite: 58691] train loss: 4.676908, tar: 0.482934 
l0: 0.598567, l1: 0.597699, l2: 0.600631, l3: 0.601830, l4: 0.599363, l5: 0.609058, l6: 0.654196

[epoch:  54/100, batch:    48/  792, ite: 58692] train loss: 4.677221, tar: 0.483101 
l0: 0.452374, l1: 0.459661, l2: 0.459254, l3: 0.458561, l4: 0.487102, l5: 0.6615

[epoch:  54/100, batch:   128/  792, ite: 58732] train loss: 4.672722, tar: 0.482616 
l0: 0.426167, l1: 0.427517, l2: 0.431934, l3: 0.446471, l4: 0.486286, l5: 0.523505, l6: 0.693592

[epoch:  54/100, batch:   130/  792, ite: 58733] train loss: 4.672088, tar: 0.482539 
l0: 0.363326, l1: 0.367012, l2: 0.365746, l3: 0.369317, l4: 0.395311, l5: 0.462040, l6: 0.608890

[epoch:  54/100, batch:   132/  792, ite: 58734] train loss: 4.670668, tar: 0.482377 
l0: 0.333767, l1: 0.334568, l2: 0.337360, l3: 0.346019, l4: 0.386944, l5: 0.411370, l6: 0.615878

[epoch:  54/100, batch:   134/  792, ite: 58735] train loss: 4.668992, tar: 0.482175 
l0: 0.433736, l1: 0.430895, l2: 0.435248, l3: 0.437796, l4: 0.423299, l5: 0.470151, l6: 0.663794

[epoch:  54/100, batch:   136/  792, ite: 58736] train loss: 4.668067, tar: 0.482109 
l0: 0.521249, l1: 0.522476, l2: 0.523340, l3: 0.524631, l4: 0.525724, l5: 0.700410, l6: 0.932828

[epoch:  54/100, batch:   138/  792, ite: 58737] train loss: 4.668946, tar: 0.48

[epoch:  54/100, batch:   218/  792, ite: 58777] train loss: 4.667393, tar: 0.481846 
l0: 0.473244, l1: 0.477390, l2: 0.478866, l3: 0.480824, l4: 0.502310, l5: 0.592054, l6: 0.853108

[epoch:  54/100, batch:   220/  792, ite: 58778] train loss: 4.667535, tar: 0.481834 
l0: 0.461315, l1: 0.459366, l2: 0.459475, l3: 0.462150, l4: 0.477607, l5: 0.543528, l6: 0.727287

[epoch:  54/100, batch:   222/  792, ite: 58779] train loss: 4.667147, tar: 0.481808 
l0: 0.335228, l1: 0.338674, l2: 0.336053, l3: 0.340694, l4: 0.365701, l5: 0.540580, l6: 0.751957

[epoch:  54/100, batch:   224/  792, ite: 58780] train loss: 4.666014, tar: 0.481620 
l0: 0.491303, l1: 0.492170, l2: 0.491780, l3: 0.498043, l4: 0.507447, l5: 0.551921, l6: 0.854056

[epoch:  54/100, batch:   226/  792, ite: 58781] train loss: 4.666374, tar: 0.481633 
l0: 0.410818, l1: 0.401498, l2: 0.398525, l3: 0.409828, l4: 0.459725, l5: 0.653045, l6: 0.761481

[epoch:  54/100, batch:   228/  792, ite: 58782] train loss: 4.665941, tar: 0.48

[epoch:  54/100, batch:   308/  792, ite: 58822] train loss: 4.672093, tar: 0.482646 
l0: 0.338814, l1: 0.340220, l2: 0.341692, l3: 0.358053, l4: 0.392977, l5: 0.532385, l6: 0.745439

[epoch:  54/100, batch:   310/  792, ite: 58823] train loss: 4.671043, tar: 0.482471 
l0: 0.569510, l1: 0.563004, l2: 0.565605, l3: 0.570283, l4: 0.574158, l5: 0.660650, l6: 0.817707

[epoch:  54/100, batch:   312/  792, ite: 58824] train loss: 4.671754, tar: 0.482576 
l0: 0.491351, l1: 0.495164, l2: 0.494513, l3: 0.496631, l4: 0.499452, l5: 0.576506, l6: 0.781965

[epoch:  54/100, batch:   314/  792, ite: 58825] train loss: 4.671716, tar: 0.482587 
l0: 0.292871, l1: 0.298047, l2: 0.299650, l3: 0.305004, l4: 0.323170, l5: 0.474419, l6: 0.635537

[epoch:  54/100, batch:   316/  792, ite: 58826] train loss: 4.669949, tar: 0.482357 
l0: 0.477055, l1: 0.485888, l2: 0.485553, l3: 0.492760, l4: 0.511962, l5: 0.588883, l6: 0.728162

[epoch:  54/100, batch:   318/  792, ite: 58827] train loss: 4.669994, tar: 0.48

[epoch:  54/100, batch:   398/  792, ite: 58867] train loss: 4.649276, tar: 0.479735 
l0: 0.622355, l1: 0.621737, l2: 0.622827, l3: 0.624308, l4: 0.650593, l5: 0.774647, l6: 0.946068

[epoch:  54/100, batch:   400/  792, ite: 58868] train loss: 4.650632, tar: 0.479900 
l0: 0.367701, l1: 0.362370, l2: 0.362692, l3: 0.371878, l4: 0.374858, l5: 0.420664, l6: 0.733243

[epoch:  54/100, batch:   402/  792, ite: 58869] train loss: 4.649665, tar: 0.479771 
l0: 0.258051, l1: 0.258574, l2: 0.261107, l3: 0.271259, l4: 0.304747, l5: 0.397054, l6: 0.555321

[epoch:  54/100, batch:   404/  792, ite: 58870] train loss: 4.647611, tar: 0.479516 
l0: 0.312487, l1: 0.316110, l2: 0.314606, l3: 0.311240, l4: 0.352849, l5: 0.485014, l6: 0.768028

[epoch:  54/100, batch:   406/  792, ite: 58871] train loss: 4.646461, tar: 0.479324 
l0: 0.652652, l1: 0.656618, l2: 0.655564, l3: 0.670613, l4: 0.673688, l5: 0.740248, l6: 0.945653

[epoch:  54/100, batch:   408/  792, ite: 58872] train loss: 4.648152, tar: 0.47

[epoch:  54/100, batch:   488/  792, ite: 58912] train loss: 4.637008, tar: 0.478204 
l0: 0.454233, l1: 0.456663, l2: 0.455613, l3: 0.458616, l4: 0.477490, l5: 0.562765, l6: 0.694459

[epoch:  54/100, batch:   490/  792, ite: 58913] train loss: 4.636728, tar: 0.478177 
l0: 0.368724, l1: 0.366919, l2: 0.362176, l3: 0.363683, l4: 0.403486, l5: 0.488931, l6: 0.649149

[epoch:  54/100, batch:   492/  792, ite: 58914] train loss: 4.635721, tar: 0.478058 
l0: 0.529385, l1: 0.535283, l2: 0.535624, l3: 0.538384, l4: 0.549863, l5: 0.561162, l6: 0.625587

[epoch:  54/100, batch:   494/  792, ite: 58915] train loss: 4.635687, tar: 0.478114 
l0: 0.562646, l1: 0.569495, l2: 0.565606, l3: 0.567877, l4: 0.571510, l5: 0.610444, l6: 0.621770

[epoch:  54/100, batch:   496/  792, ite: 58916] train loss: 4.635897, tar: 0.478206 
l0: 0.381433, l1: 0.388550, l2: 0.391467, l3: 0.396701, l4: 0.442933, l5: 0.534601, l6: 0.794561

[epoch:  54/100, batch:   498/  792, ite: 58917] train loss: 4.635365, tar: 0.47

[epoch:  54/100, batch:   578/  792, ite: 58957] train loss: 4.633149, tar: 0.477807 
l0: 0.599129, l1: 0.629761, l2: 0.626488, l3: 0.627676, l4: 0.628710, l5: 0.700422, l6: 0.845575

[epoch:  54/100, batch:   580/  792, ite: 58958] train loss: 4.634113, tar: 0.477933 
l0: 0.487185, l1: 0.492210, l2: 0.491119, l3: 0.484307, l4: 0.536328, l5: 0.630416, l6: 0.848022

[epoch:  54/100, batch:   582/  792, ite: 58959] train loss: 4.634327, tar: 0.477943 
l0: 0.310886, l1: 0.312778, l2: 0.314894, l3: 0.317958, l4: 0.333568, l5: 0.465424, l6: 0.583604

[epoch:  54/100, batch:   584/  792, ite: 58960] train loss: 4.632967, tar: 0.477769 
l0: 0.735465, l1: 0.733414, l2: 0.734434, l3: 0.735575, l4: 0.756636, l5: 0.837288, l6: 1.015492

[epoch:  54/100, batch:   586/  792, ite: 58961] train loss: 4.634942, tar: 0.478037 
l0: 0.291621, l1: 0.296779, l2: 0.299186, l3: 0.304530, l4: 0.336109, l5: 0.511294, l6: 0.684568

[epoch:  54/100, batch:   588/  792, ite: 58962] train loss: 4.633738, tar: 0.47

[epoch:  54/100, batch:   668/  792, ite: 59002] train loss: 4.629791, tar: 0.477860 
l0: 0.252512, l1: 0.258244, l2: 0.257242, l3: 0.278893, l4: 0.318482, l5: 0.407030, l6: 0.683435

[epoch:  54/100, batch:   670/  792, ite: 59003] train loss: 4.628252, tar: 0.477635 
l0: 0.637688, l1: 0.640157, l2: 0.647263, l3: 0.651648, l4: 0.668023, l5: 0.711760, l6: 0.885286

[epoch:  54/100, batch:   672/  792, ite: 59004] train loss: 4.629476, tar: 0.477795 
l0: 0.489943, l1: 0.494101, l2: 0.494776, l3: 0.503577, l4: 0.549965, l5: 0.663513, l6: 0.766570

[epoch:  54/100, batch:   674/  792, ite: 59005] train loss: 4.629634, tar: 0.477807 
l0: 0.539309, l1: 0.530894, l2: 0.533188, l3: 0.550300, l4: 0.603688, l5: 0.757818, l6: 0.898939

[epoch:  54/100, batch:   676/  792, ite: 59006] train loss: 4.630352, tar: 0.477868 
l0: 0.271186, l1: 0.272920, l2: 0.275079, l3: 0.278085, l4: 0.308435, l5: 0.398447, l6: 0.508609

[epoch:  54/100, batch:   678/  792, ite: 59007] train loss: 4.628631, tar: 0.47

[epoch:  54/100, batch:   758/  792, ite: 59047] train loss: 4.640611, tar: 0.479390 
l0: 0.352398, l1: 0.353812, l2: 0.351328, l3: 0.354077, l4: 0.408645, l5: 0.515680, l6: 0.883035

[epoch:  54/100, batch:   760/  792, ite: 59048] train loss: 4.640185, tar: 0.479268 
l0: 0.734759, l1: 0.742418, l2: 0.743167, l3: 0.754566, l4: 0.804481, l5: 0.950524, l6: 1.374223

[epoch:  54/100, batch:   762/  792, ite: 59049] train loss: 4.642909, tar: 0.479512 
l0: 0.334865, l1: 0.336857, l2: 0.333554, l3: 0.340036, l4: 0.373934, l5: 0.469106, l6: 0.596315

[epoch:  54/100, batch:   764/  792, ite: 59050] train loss: 4.641744, tar: 0.479374 
l0: 0.650119, l1: 0.651744, l2: 0.656266, l3: 0.670906, l4: 0.694006, l5: 0.706217, l6: 0.842497

[epoch:  54/100, batch:   766/  792, ite: 59051] train loss: 4.642798, tar: 0.479537 
l0: 0.605832, l1: 0.610420, l2: 0.608536, l3: 0.612045, l4: 0.656160, l5: 0.773663, l6: 1.012199

[epoch:  54/100, batch:   768/  792, ite: 59052] train loss: 4.643997, tar: 0.47

l0: 0.504028, l1: 0.506097, l2: 0.506434, l3: 0.509088, l4: 0.536281, l5: 0.614335, l6: 0.745844

[epoch:  55/100, batch:    56/  792, ite: 59092] train loss: 4.639499, tar: 0.479141 
l0: 0.505193, l1: 0.513611, l2: 0.511945, l3: 0.505805, l4: 0.500058, l5: 0.616036, l6: 0.814743

[epoch:  55/100, batch:    58/  792, ite: 59093] train loss: 4.639761, tar: 0.479165 
l0: 0.916987, l1: 0.921416, l2: 0.922889, l3: 0.925081, l4: 0.949260, l5: 1.024069, l6: 0.998255

[epoch:  55/100, batch:    60/  792, ite: 59094] train loss: 4.642563, tar: 0.479565 
l0: 0.617561, l1: 0.623119, l2: 0.624690, l3: 0.636899, l4: 0.692955, l5: 0.750796, l6: 0.966946

[epoch:  55/100, batch:    62/  792, ite: 59095] train loss: 4.643856, tar: 0.479691 
l0: 0.616216, l1: 0.613084, l2: 0.611635, l3: 0.624732, l4: 0.642619, l5: 0.773452, l6: 0.873330

[epoch:  55/100, batch:    64/  792, ite: 59096] train loss: 4.644950, tar: 0.479816 
l0: 0.508759, l1: 0.519028, l2: 0.517089, l3: 0.532172, l4: 0.539649, l5: 0.6138

[epoch:  55/100, batch:   144/  792, ite: 59136] train loss: 4.644744, tar: 0.480042 
l0: 0.353526, l1: 0.355155, l2: 0.353859, l3: 0.359752, l4: 0.376207, l5: 0.455367, l6: 0.607035

[epoch:  55/100, batch:   146/  792, ite: 59137] train loss: 4.643716, tar: 0.479931 
l0: 1.229000, l1: 1.242383, l2: 1.232325, l3: 1.239983, l4: 1.262534, l5: 1.267998, l6: 1.287212

[epoch:  55/100, batch:   148/  792, ite: 59138] train loss: 4.648658, tar: 0.480589 
l0: 0.498156, l1: 0.499277, l2: 0.501741, l3: 0.504848, l4: 0.529796, l5: 0.657533, l6: 0.852986

[epoch:  55/100, batch:   150/  792, ite: 59139] train loss: 4.648863, tar: 0.480605 
l0: 0.447572, l1: 0.448778, l2: 0.449570, l3: 0.459360, l4: 0.471446, l5: 0.572042, l6: 0.686380

[epoch:  55/100, batch:   152/  792, ite: 59140] train loss: 4.648550, tar: 0.480576 
l0: 0.942584, l1: 0.951116, l2: 0.954374, l3: 0.955752, l4: 0.965396, l5: 0.954812, l6: 1.082524

[epoch:  55/100, batch:   154/  792, ite: 59141] train loss: 4.651436, tar: 0.48

[epoch:  55/100, batch:   234/  792, ite: 59181] train loss: 4.651615, tar: 0.481023 
l0: 0.951552, l1: 0.972071, l2: 0.970465, l3: 0.960147, l4: 0.953218, l5: 1.016902, l6: 1.185366

[epoch:  55/100, batch:   236/  792, ite: 59182] train loss: 4.654722, tar: 0.481421 
l0: 0.180009, l1: 0.179956, l2: 0.182826, l3: 0.196310, l4: 0.266201, l5: 0.354138, l6: 0.481805

[epoch:  55/100, batch:   238/  792, ite: 59183] train loss: 4.652823, tar: 0.481166 
l0: 0.371710, l1: 0.377315, l2: 0.373454, l3: 0.377828, l4: 0.380574, l5: 0.466508, l6: 0.642149

[epoch:  55/100, batch:   240/  792, ite: 59184] train loss: 4.651948, tar: 0.481074 
l0: 0.511806, l1: 0.515965, l2: 0.521117, l3: 0.518004, l4: 0.532242, l5: 0.601508, l6: 0.909750

[epoch:  55/100, batch:   242/  792, ite: 59185] train loss: 4.652256, tar: 0.481100 
l0: 0.612241, l1: 0.618645, l2: 0.618750, l3: 0.623125, l4: 0.635630, l5: 0.618546, l6: 0.853991

[epoch:  55/100, batch:   244/  792, ite: 59186] train loss: 4.652889, tar: 0.48

[epoch:  55/100, batch:   324/  792, ite: 59226] train loss: 4.650220, tar: 0.480571 
l0: 0.421054, l1: 0.423339, l2: 0.419895, l3: 0.427941, l4: 0.430400, l5: 0.549662, l6: 0.615237

[epoch:  55/100, batch:   326/  792, ite: 59227] train loss: 4.649638, tar: 0.480523 
l0: 0.235753, l1: 0.235156, l2: 0.236576, l3: 0.243019, l4: 0.269224, l5: 0.259385, l6: 0.443741

[epoch:  55/100, batch:   328/  792, ite: 59228] train loss: 4.647817, tar: 0.480323 
l0: 0.240462, l1: 0.245028, l2: 0.247532, l3: 0.246880, l4: 0.267500, l5: 0.343855, l6: 0.550288

[epoch:  55/100, batch:   330/  792, ite: 59229] train loss: 4.646322, tar: 0.480128 
l0: 0.265715, l1: 0.270394, l2: 0.272964, l3: 0.286161, l4: 0.326335, l5: 0.462226, l6: 0.576785

[epoch:  55/100, batch:   332/  792, ite: 59230] train loss: 4.645034, tar: 0.479954 
l0: 0.351772, l1: 0.353815, l2: 0.354564, l3: 0.365796, l4: 0.406059, l5: 0.498208, l6: 0.611344

[epoch:  55/100, batch:   334/  792, ite: 59231] train loss: 4.644193, tar: 0.47

[epoch:  55/100, batch:   414/  792, ite: 59271] train loss: 4.661226, tar: 0.482154 
l0: 0.498740, l1: 0.500702, l2: 0.501435, l3: 0.506933, l4: 0.525642, l5: 0.583314, l6: 0.664161

[epoch:  55/100, batch:   416/  792, ite: 59272] train loss: 4.661087, tar: 0.482167 
l0: 0.551907, l1: 0.555430, l2: 0.557827, l3: 0.556289, l4: 0.587887, l5: 0.736936, l6: 0.878342

[epoch:  55/100, batch:   418/  792, ite: 59273] train loss: 4.661707, tar: 0.482221 
l0: 0.303676, l1: 0.309417, l2: 0.311573, l3: 0.317232, l4: 0.330660, l5: 0.450889, l6: 0.613476

[epoch:  55/100, batch:   420/  792, ite: 59274] train loss: 4.660713, tar: 0.482081 
l0: 0.395018, l1: 0.393422, l2: 0.400256, l3: 0.407986, l4: 0.409032, l5: 0.444520, l6: 0.596259

[epoch:  55/100, batch:   422/  792, ite: 59275] train loss: 4.659953, tar: 0.482013 
l0: 0.202898, l1: 0.200623, l2: 0.202987, l3: 0.211613, l4: 0.247426, l5: 0.330580, l6: 0.528488

[epoch:  55/100, batch:   424/  792, ite: 59276] train loss: 4.658249, tar: 0.48

[epoch:  55/100, batch:   504/  792, ite: 59316] train loss: 4.672116, tar: 0.483341 
l0: 0.477433, l1: 0.483288, l2: 0.481812, l3: 0.487579, l4: 0.538682, l5: 0.603814, l6: 0.746264

[epoch:  55/100, batch:   506/  792, ite: 59317] train loss: 4.672032, tar: 0.483336 
l0: 0.309747, l1: 0.309717, l2: 0.312330, l3: 0.317089, l4: 0.362692, l5: 0.484988, l6: 0.606318

[epoch:  55/100, batch:   508/  792, ite: 59318] train loss: 4.671028, tar: 0.483205 
l0: 0.451406, l1: 0.454841, l2: 0.451361, l3: 0.459932, l4: 0.497044, l5: 0.535215, l6: 0.720488

[epoch:  55/100, batch:   510/  792, ite: 59319] train loss: 4.670749, tar: 0.483181 
l0: 0.292103, l1: 0.294946, l2: 0.292909, l3: 0.300863, l4: 0.323252, l5: 0.365539, l6: 0.513513

[epoch:  55/100, batch:   512/  792, ite: 59320] train loss: 4.669470, tar: 0.483036 
l0: 0.364971, l1: 0.361267, l2: 0.368316, l3: 0.381779, l4: 0.428749, l5: 0.545538, l6: 0.693012

[epoch:  55/100, batch:   514/  792, ite: 59321] train loss: 4.668926, tar: 0.48

[epoch:  55/100, batch:   594/  792, ite: 59361] train loss: 4.662560, tar: 0.482130 
l0: 0.854258, l1: 0.897764, l2: 0.893682, l3: 0.900288, l4: 0.946750, l5: 0.981636, l6: 0.970534

[epoch:  55/100, batch:   596/  792, ite: 59362] train loss: 4.664710, tar: 0.482403 
l0: 0.380552, l1: 0.387272, l2: 0.383715, l3: 0.380800, l4: 0.406236, l5: 0.467853, l6: 0.952877

[epoch:  55/100, batch:   598/  792, ite: 59363] train loss: 4.664466, tar: 0.482329 
l0: 0.666278, l1: 0.661405, l2: 0.665044, l3: 0.683175, l4: 0.701781, l5: 0.826995, l6: 0.895144

[epoch:  55/100, batch:   600/  792, ite: 59364] train loss: 4.665501, tar: 0.482463 
l0: 0.275969, l1: 0.275520, l2: 0.275666, l3: 0.276794, l4: 0.301197, l5: 0.419866, l6: 0.518733

[epoch:  55/100, batch:   602/  792, ite: 59365] train loss: 4.664204, tar: 0.482312 
l0: 0.267644, l1: 0.269767, l2: 0.268288, l3: 0.286004, l4: 0.319192, l5: 0.475542, l6: 0.727919

[epoch:  55/100, batch:   604/  792, ite: 59366] train loss: 4.663311, tar: 0.48

[epoch:  55/100, batch:   684/  792, ite: 59406] train loss: 4.663121, tar: 0.482186 
l0: 0.412381, l1: 0.411392, l2: 0.412229, l3: 0.415728, l4: 0.461338, l5: 0.593557, l6: 0.744998

[epoch:  55/100, batch:   686/  792, ite: 59407] train loss: 4.662847, tar: 0.482137 
l0: 0.335774, l1: 0.336110, l2: 0.335903, l3: 0.336989, l4: 0.345320, l5: 0.404483, l6: 0.575440

[epoch:  55/100, batch:   688/  792, ite: 59408] train loss: 4.661836, tar: 0.482033 
l0: 0.423147, l1: 0.429896, l2: 0.426684, l3: 0.418623, l4: 0.450618, l5: 0.573619, l6: 0.723682

[epoch:  55/100, batch:   690/  792, ite: 59409] train loss: 4.661604, tar: 0.481991 
l0: 0.533826, l1: 0.537465, l2: 0.535109, l3: 0.541527, l4: 0.557993, l5: 0.625855, l6: 0.617054

[epoch:  55/100, batch:   692/  792, ite: 59410] train loss: 4.661529, tar: 0.482028 
l0: 0.357403, l1: 0.359828, l2: 0.359902, l3: 0.373344, l4: 0.391848, l5: 0.514808, l6: 0.653024

[epoch:  55/100, batch:   694/  792, ite: 59411] train loss: 4.660851, tar: 0.48

[epoch:  55/100, batch:   774/  792, ite: 59451] train loss: 4.647496, tar: 0.480196 
l0: 0.322206, l1: 0.321951, l2: 0.317573, l3: 0.310449, l4: 0.330690, l5: 0.453018, l6: 0.741227

[epoch:  55/100, batch:   776/  792, ite: 59452] train loss: 4.646813, tar: 0.480088 
l0: 0.455678, l1: 0.451610, l2: 0.455934, l3: 0.458092, l4: 0.497533, l5: 0.691781, l6: 0.892261

[epoch:  55/100, batch:   778/  792, ite: 59453] train loss: 4.646923, tar: 0.480071 
l0: 0.533049, l1: 0.538395, l2: 0.539271, l3: 0.544545, l4: 0.559191, l5: 0.714492, l6: 0.840888

[epoch:  55/100, batch:   780/  792, ite: 59454] train loss: 4.647290, tar: 0.480107 
l0: 0.992706, l1: 0.996687, l2: 0.998531, l3: 1.003045, l4: 1.043377, l5: 1.013939, l6: 1.053016

[epoch:  55/100, batch:   782/  792, ite: 59455] train loss: 4.649723, tar: 0.480460 
l0: 0.353169, l1: 0.353808, l2: 0.355602, l3: 0.358923, l4: 0.380114, l5: 0.434227, l6: 0.608747

[epoch:  55/100, batch:   784/  792, ite: 59456] train loss: 4.648964, tar: 0.48

l0: 0.260482, l1: 0.257490, l2: 0.258797, l3: 0.270408, l4: 0.313249, l5: 0.398523, l6: 0.517050

[epoch:  56/100, batch:    72/  792, ite: 59496] train loss: 4.654031, tar: 0.481095 
l0: 0.258049, l1: 0.256603, l2: 0.259719, l3: 0.267596, l4: 0.298507, l5: 0.490202, l6: 0.601593

[epoch:  56/100, batch:    74/  792, ite: 59497] train loss: 4.653020, tar: 0.480946 
l0: 0.294599, l1: 0.294189, l2: 0.299198, l3: 0.301617, l4: 0.346617, l5: 0.426581, l6: 0.542367

[epoch:  56/100, batch:    76/  792, ite: 59498] train loss: 4.651997, tar: 0.480821 
l0: 0.554377, l1: 0.554750, l2: 0.556739, l3: 0.556107, l4: 0.550633, l5: 0.576367, l6: 0.714600

[epoch:  56/100, batch:    78/  792, ite: 59499] train loss: 4.652164, tar: 0.480871 
l0: 0.305084, l1: 0.307166, l2: 0.307959, l3: 0.309496, l4: 0.349833, l5: 0.494864, l6: 0.619480

[epoch:  56/100, batch:    80/  792, ite: 59500] train loss: 4.651299, tar: 0.480753 
l0: 1.026170, l1: 1.029908, l2: 1.025659, l3: 1.029789, l4: 1.106166, l5: 1.1939

[epoch:  56/100, batch:   160/  792, ite: 59540] train loss: 4.654275, tar: 0.481140 
l0: 0.457592, l1: 0.461025, l2: 0.457562, l3: 0.459206, l4: 0.448971, l5: 0.529262, l6: 0.661839

[epoch:  56/100, batch:   162/  792, ite: 59541] train loss: 4.654010, tar: 0.481124 
l0: 0.406989, l1: 0.418775, l2: 0.414641, l3: 0.406556, l4: 0.425027, l5: 0.509559, l6: 0.559504

[epoch:  56/100, batch:   164/  792, ite: 59542] train loss: 4.653370, tar: 0.481076 
l0: 0.469518, l1: 0.468278, l2: 0.468792, l3: 0.469011, l4: 0.481144, l5: 0.581836, l6: 0.754004

[epoch:  56/100, batch:   166/  792, ite: 59543] train loss: 4.653276, tar: 0.481069 
l0: 0.587225, l1: 0.582217, l2: 0.583848, l3: 0.585309, l4: 0.614957, l5: 0.688847, l6: 0.837250

[epoch:  56/100, batch:   168/  792, ite: 59544] train loss: 4.653733, tar: 0.481138 
l0: 0.340632, l1: 0.343677, l2: 0.342927, l3: 0.347903, l4: 0.381951, l5: 0.497275, l6: 0.807330

[epoch:  56/100, batch:   170/  792, ite: 59545] train loss: 4.653217, tar: 0.48

[epoch:  56/100, batch:   250/  792, ite: 59585] train loss: 4.646982, tar: 0.480188 
l0: 0.353317, l1: 0.356394, l2: 0.355387, l3: 0.353006, l4: 0.379223, l5: 0.430290, l6: 0.605744

[epoch:  56/100, batch:   252/  792, ite: 59586] train loss: 4.646228, tar: 0.480108 
l0: 0.320790, l1: 0.327317, l2: 0.329427, l3: 0.336552, l4: 0.347347, l5: 0.492843, l6: 0.686942

[epoch:  56/100, batch:   254/  792, ite: 59587] train loss: 4.645603, tar: 0.480007 
l0: 0.506176, l1: 0.509365, l2: 0.507593, l3: 0.511683, l4: 0.546381, l5: 0.575644, l6: 0.842946

[epoch:  56/100, batch:   256/  792, ite: 59588] train loss: 4.645836, tar: 0.480024 
l0: 1.083517, l1: 1.076146, l2: 1.072602, l3: 1.085887, l4: 1.120069, l5: 1.238611, l6: 1.583470

[epoch:  56/100, batch:   258/  792, ite: 59589] train loss: 4.649098, tar: 0.480404 
l0: 0.601465, l1: 0.601098, l2: 0.605351, l3: 0.599147, l4: 0.628293, l5: 0.700949, l6: 0.968301

[epoch:  56/100, batch:   260/  792, ite: 59590] train loss: 4.649738, tar: 0.48

[epoch:  56/100, batch:   340/  792, ite: 59630] train loss: 4.636784, tar: 0.478775 
l0: 0.387516, l1: 0.394291, l2: 0.392835, l3: 0.399457, l4: 0.405214, l5: 0.498163, l6: 0.580943

[epoch:  56/100, batch:   342/  792, ite: 59631] train loss: 4.636319, tar: 0.478720 
l0: 0.610457, l1: 0.611817, l2: 0.615221, l3: 0.618703, l4: 0.654118, l5: 0.750670, l6: 0.979102

[epoch:  56/100, batch:   344/  792, ite: 59632] train loss: 4.637160, tar: 0.478800 
l0: 0.821256, l1: 0.819765, l2: 0.820275, l3: 0.816071, l4: 0.840593, l5: 0.933809, l6: 1.179459

[epoch:  56/100, batch:   346/  792, ite: 59633] train loss: 4.638870, tar: 0.479010 
l0: 0.591774, l1: 0.592493, l2: 0.587835, l3: 0.596424, l4: 0.595920, l5: 0.620992, l6: 0.834373

[epoch:  56/100, batch:   348/  792, ite: 59634] train loss: 4.639321, tar: 0.479079 
l0: 0.519833, l1: 0.517956, l2: 0.522126, l3: 0.532215, l4: 0.547305, l5: 0.598667, l6: 0.636179

[epoch:  56/100, batch:   350/  792, ite: 59635] train loss: 4.639305, tar: 0.47

[epoch:  56/100, batch:   430/  792, ite: 59675] train loss: 4.638287, tar: 0.479261 
l0: 0.317886, l1: 0.320860, l2: 0.320295, l3: 0.323591, l4: 0.363940, l5: 0.480170, l6: 0.552860

[epoch:  56/100, batch:   432/  792, ite: 59676] train loss: 4.637469, tar: 0.479165 
l0: 0.717213, l1: 0.725959, l2: 0.724409, l3: 0.720557, l4: 0.740775, l5: 0.832930, l6: 1.046540

[epoch:  56/100, batch:   434/  792, ite: 59677] train loss: 4.638668, tar: 0.479307 
l0: 0.422557, l1: 0.430104, l2: 0.429805, l3: 0.437016, l4: 0.435235, l5: 0.600678, l6: 0.722153

[epoch:  56/100, batch:   436/  792, ite: 59678] train loss: 4.638450, tar: 0.479273 
l0: 0.317868, l1: 0.318838, l2: 0.320974, l3: 0.327352, l4: 0.315617, l5: 0.444723, l6: 0.648903

[epoch:  56/100, batch:   438/  792, ite: 59679] train loss: 4.637700, tar: 0.479177 
l0: 0.353084, l1: 0.351703, l2: 0.351877, l3: 0.361600, l4: 0.365922, l5: 0.407661, l6: 0.478497

[epoch:  56/100, batch:   440/  792, ite: 59680] train loss: 4.636945, tar: 0.47

[epoch:  56/100, batch:   520/  792, ite: 59720] train loss: 4.639832, tar: 0.479512 
l0: 0.701417, l1: 0.708973, l2: 0.706556, l3: 0.700573, l4: 0.731636, l5: 0.861613, l6: 1.023957

[epoch:  56/100, batch:   522/  792, ite: 59721] train loss: 4.640987, tar: 0.479641 
l0: 0.830093, l1: 0.844864, l2: 0.853442, l3: 0.859469, l4: 0.874989, l5: 0.923448, l6: 0.991727

[epoch:  56/100, batch:   524/  792, ite: 59722] train loss: 4.642524, tar: 0.479844 
l0: 0.784214, l1: 0.791972, l2: 0.793020, l3: 0.783179, l4: 0.820317, l5: 0.912004, l6: 1.074873

[epoch:  56/100, batch:   526/  792, ite: 59723] train loss: 4.643988, tar: 0.480021 
l0: 0.251638, l1: 0.250415, l2: 0.251727, l3: 0.246803, l4: 0.266391, l5: 0.340549, l6: 0.507202

[epoch:  56/100, batch:   528/  792, ite: 59724] train loss: 4.642826, tar: 0.479888 
l0: 0.694961, l1: 0.693537, l2: 0.692864, l3: 0.701403, l4: 0.726637, l5: 0.835707, l6: 1.012138

[epoch:  56/100, batch:   530/  792, ite: 59725] train loss: 4.643874, tar: 0.48

[epoch:  56/100, batch:   610/  792, ite: 59765] train loss: 4.634077, tar: 0.478903 
l0: 0.227638, l1: 0.227996, l2: 0.229913, l3: 0.227296, l4: 0.252200, l5: 0.286926, l6: 0.388263

[epoch:  56/100, batch:   612/  792, ite: 59766] train loss: 4.632720, tar: 0.478761 
l0: 0.530538, l1: 0.526212, l2: 0.523717, l3: 0.519688, l4: 0.528055, l5: 0.604186, l6: 0.766923

[epoch:  56/100, batch:   614/  792, ite: 59767] train loss: 4.632786, tar: 0.478790 
l0: 0.580299, l1: 0.584043, l2: 0.588502, l3: 0.589713, l4: 0.594554, l5: 0.685494, l6: 0.752617

[epoch:  56/100, batch:   616/  792, ite: 59768] train loss: 4.633115, tar: 0.478847 
l0: 0.546901, l1: 0.553358, l2: 0.556663, l3: 0.563657, l4: 0.585583, l5: 0.701833, l6: 0.785023

[epoch:  56/100, batch:   618/  792, ite: 59769] train loss: 4.633398, tar: 0.478886 
l0: 0.415688, l1: 0.419736, l2: 0.422706, l3: 0.427456, l4: 0.443751, l5: 0.535051, l6: 0.682144

[epoch:  56/100, batch:   620/  792, ite: 59770] train loss: 4.633144, tar: 0.47

[epoch:  56/100, batch:   700/  792, ite: 59810] train loss: 4.637165, tar: 0.479208 
l0: 0.605991, l1: 0.606288, l2: 0.607700, l3: 0.603045, l4: 0.639715, l5: 0.726762, l6: 0.985056

[epoch:  56/100, batch:   702/  792, ite: 59811] train loss: 4.637916, tar: 0.479278 
l0: 0.373012, l1: 0.371141, l2: 0.365944, l3: 0.368067, l4: 0.412258, l5: 0.498994, l6: 0.807520

[epoch:  56/100, batch:   704/  792, ite: 59812] train loss: 4.637642, tar: 0.479219 
l0: 0.813672, l1: 0.805954, l2: 0.823780, l3: 0.840878, l4: 0.836168, l5: 0.839991, l6: 1.025567

[epoch:  56/100, batch:   706/  792, ite: 59813] train loss: 4.638886, tar: 0.479403 
l0: 0.499421, l1: 0.501029, l2: 0.501889, l3: 0.505786, l4: 0.525647, l5: 0.696901, l6: 0.921349

[epoch:  56/100, batch:   708/  792, ite: 59814] train loss: 4.639146, tar: 0.479415 
l0: 0.678491, l1: 0.681048, l2: 0.682474, l3: 0.705309, l4: 0.780334, l5: 0.941309, l6: 1.440490

[epoch:  56/100, batch:   710/  792, ite: 59815] train loss: 4.640651, tar: 0.47

[epoch:  56/100, batch:   790/  792, ite: 59855] train loss: 4.635762, tar: 0.478914 
l0: 0.323278, l1: 0.327271, l2: 0.325742, l3: 0.334135, l4: 0.357402, l5: 0.505172, l6: 0.652147

[epoch:  56/100, batch:   792/  792, ite: 59856] train loss: 4.635160, tar: 0.478830 
Starting epoch 57
Epoch 57 loading complete
l0: 0.714934, l1: 0.715173, l2: 0.713140, l3: 0.715875, l4: 0.746262, l5: 0.882974, l6: 0.936097

[epoch:  57/100, batch:     2/  792, ite: 59857] train loss: 4.636123, tar: 0.478957 
l0: 0.316529, l1: 0.319394, l2: 0.322503, l3: 0.329267, l4: 0.389020, l5: 0.502361, l6: 0.593428

[epoch:  57/100, batch:     4/  792, ite: 59858] train loss: 4.635454, tar: 0.478870 
l0: 0.376839, l1: 0.379026, l2: 0.382699, l3: 0.391288, l4: 0.409333, l5: 0.526224, l6: 0.757106

[epoch:  57/100, batch:     6/  792, ite: 59859] train loss: 4.635117, tar: 0.478815 
l0: 0.334910, l1: 0.332820, l2: 0.331059, l3: 0.332474, l4: 0.382680, l5: 0.533267, l6: 0.778929

[epoch:  57/100, batch:     8/  792,

l0: 0.569485, l1: 0.569866, l2: 0.567193, l3: 0.565744, l4: 0.596352, l5: 0.680132, l6: 0.802143

[epoch:  57/100, batch:    88/  792, ite: 59900] train loss: 4.633537, tar: 0.478544 
l0: 0.261680, l1: 0.262246, l2: 0.265519, l3: 0.269135, l4: 0.332568, l5: 0.502689, l6: 0.690401

[epoch:  57/100, batch:    90/  792, ite: 59901] train loss: 4.632833, tar: 0.478430 
l0: 0.588461, l1: 0.590192, l2: 0.588231, l3: 0.597093, l4: 0.639513, l5: 0.725385, l6: 0.795522

[epoch:  57/100, batch:    92/  792, ite: 59902] train loss: 4.633223, tar: 0.478488 
l0: 0.298293, l1: 0.289923, l2: 0.286033, l3: 0.282062, l4: 0.294053, l5: 0.344867, l6: 0.442766

[epoch:  57/100, batch:    94/  792, ite: 59903] train loss: 4.632218, tar: 0.478393 
l0: 0.336498, l1: 0.336864, l2: 0.333625, l3: 0.336082, l4: 0.344465, l5: 0.428348, l6: 0.548469

[epoch:  57/100, batch:    96/  792, ite: 59904] train loss: 4.631450, tar: 0.478319 
l0: 0.272129, l1: 0.277688, l2: 0.276907, l3: 0.280529, l4: 0.290141, l5: 0.3599

[epoch:  57/100, batch:   176/  792, ite: 59944] train loss: 4.625352, tar: 0.477674 
l0: 1.474701, l1: 1.473611, l2: 1.506638, l3: 1.515748, l4: 1.518438, l5: 1.489470, l6: 1.347598

[epoch:  57/100, batch:   178/  792, ite: 59945] train loss: 4.629132, tar: 0.478186 
l0: 0.167107, l1: 0.166749, l2: 0.166303, l3: 0.171398, l4: 0.196711, l5: 0.262329, l6: 0.381406

[epoch:  57/100, batch:   180/  792, ite: 59946] train loss: 4.627738, tar: 0.478026 
l0: 0.725383, l1: 0.724346, l2: 0.719432, l3: 0.714864, l4: 0.744677, l5: 0.841165, l6: 0.868869

[epoch:  57/100, batch:   182/  792, ite: 59947] train loss: 4.628552, tar: 0.478153 
l0: 0.381470, l1: 0.382273, l2: 0.385068, l3: 0.400932, l4: 0.438837, l5: 0.568915, l6: 0.836337

[epoch:  57/100, batch:   184/  792, ite: 59948] train loss: 4.628366, tar: 0.478104 
l0: 0.370906, l1: 0.370504, l2: 0.370511, l3: 0.376433, l4: 0.403418, l5: 0.468038, l6: 0.790584

[epoch:  57/100, batch:   186/  792, ite: 59949] train loss: 4.627994, tar: 0.47

[epoch:  57/100, batch:   266/  792, ite: 59989] train loss: 4.619594, tar: 0.477012 
l0: 0.401891, l1: 0.404550, l2: 0.403130, l3: 0.404702, l4: 0.398651, l5: 0.489139, l6: 0.625606

[epoch:  57/100, batch:   268/  792, ite: 59990] train loss: 4.619169, tar: 0.476974 
l0: 0.571192, l1: 0.574043, l2: 0.572741, l3: 0.577035, l4: 0.576734, l5: 0.741593, l6: 0.947354

[epoch:  57/100, batch:   270/  792, ite: 59991] train loss: 4.619662, tar: 0.477021 
l0: 0.373520, l1: 0.376076, l2: 0.376487, l3: 0.397677, l4: 0.440323, l5: 0.502399, l6: 0.756112

[epoch:  57/100, batch:   272/  792, ite: 59992] train loss: 4.619417, tar: 0.476969 
l0: 0.367498, l1: 0.359592, l2: 0.360910, l3: 0.368996, l4: 0.397904, l5: 0.429492, l6: 0.569829

[epoch:  57/100, batch:   274/  792, ite: 59993] train loss: 4.618832, tar: 0.476914 
l0: 0.295247, l1: 0.294851, l2: 0.292426, l3: 0.284807, l4: 0.299324, l5: 0.432633, l6: 0.591318

[epoch:  57/100, batch:   276/  792, ite: 59994] train loss: 4.618063, tar: 0.47

[epoch:  57/100, batch:   356/  792, ite: 60034] train loss: 4.365869, tar: 0.440496 
l0: 0.457778, l1: 0.451740, l2: 0.453607, l3: 0.447265, l4: 0.459811, l5: 0.575392, l6: 0.810815

[epoch:  57/100, batch:   358/  792, ite: 60035] train loss: 4.369700, tar: 0.440990 
l0: 0.398475, l1: 0.398415, l2: 0.400017, l3: 0.411433, l4: 0.402384, l5: 0.471955, l6: 0.622955

[epoch:  57/100, batch:   360/  792, ite: 60036] train loss: 4.356276, tar: 0.439809 
l0: 0.728666, l1: 0.729172, l2: 0.723915, l3: 0.732135, l4: 0.748146, l5: 0.797157, l6: 0.967457

[epoch:  57/100, batch:   362/  792, ite: 60037] train loss: 4.414004, tar: 0.447616 
l0: 0.283334, l1: 0.290192, l2: 0.287899, l3: 0.287445, l4: 0.319137, l5: 0.487536, l6: 0.665460

[epoch:  57/100, batch:   364/  792, ite: 60038] train loss: 4.386521, tar: 0.443292 
l0: 0.652102, l1: 0.658517, l2: 0.657114, l3: 0.672953, l4: 0.706200, l5: 0.807840, l6: 0.880528

[epoch:  57/100, batch:   366/  792, ite: 60039] train loss: 4.427613, tar: 0.44

[epoch:  57/100, batch:   446/  792, ite: 60079] train loss: 4.570456, tar: 0.468969 
l0: 0.620760, l1: 0.627415, l2: 0.625886, l3: 0.632010, l4: 0.648930, l5: 0.692567, l6: 0.819741

[epoch:  57/100, batch:   448/  792, ite: 60080] train loss: 4.582204, tar: 0.470866 
l0: 0.262495, l1: 0.261661, l2: 0.266421, l3: 0.273962, l4: 0.319193, l5: 0.437275, l6: 0.527130

[epoch:  57/100, batch:   450/  792, ite: 60081] train loss: 4.562153, tar: 0.468294 
l0: 0.210643, l1: 0.218152, l2: 0.221500, l3: 0.229513, l4: 0.268295, l5: 0.372045, l6: 0.512363

[epoch:  57/100, batch:   452/  792, ite: 60082] train loss: 4.538046, tar: 0.465152 
l0: 0.456931, l1: 0.467688, l2: 0.472964, l3: 0.481794, l4: 0.511309, l5: 0.683436, l6: 0.898620

[epoch:  57/100, batch:   454/  792, ite: 60083] train loss: 4.542898, tar: 0.465053 
l0: 0.286404, l1: 0.295120, l2: 0.296734, l3: 0.308097, l4: 0.321906, l5: 0.408703, l6: 0.542362

[epoch:  57/100, batch:   456/  792, ite: 60084] train loss: 4.526004, tar: 0.46

[epoch:  57/100, batch:   536/  792, ite: 60124] train loss: 4.545067, tar: 0.466703 
l0: 0.318874, l1: 0.326975, l2: 0.329171, l3: 0.337757, l4: 0.367654, l5: 0.457041, l6: 0.590254

[epoch:  57/100, batch:   538/  792, ite: 60125] train loss: 4.535723, tar: 0.465520 
l0: 0.511297, l1: 0.515659, l2: 0.515154, l3: 0.517424, l4: 0.537665, l5: 0.603732, l6: 0.821349

[epoch:  57/100, batch:   540/  792, ite: 60126] train loss: 4.538903, tar: 0.465884 
l0: 0.489511, l1: 0.487698, l2: 0.490668, l3: 0.492153, l4: 0.498456, l5: 0.563311, l6: 0.678238

[epoch:  57/100, batch:   542/  792, ite: 60127] train loss: 4.538608, tar: 0.466070 
l0: 0.371386, l1: 0.378160, l2: 0.377655, l3: 0.388692, l4: 0.432297, l5: 0.577389, l6: 0.904182

[epoch:  57/100, batch:   544/  792, ite: 60128] train loss: 4.536902, tar: 0.465330 
l0: 0.364062, l1: 0.366081, l2: 0.366120, l3: 0.376306, l4: 0.401273, l5: 0.560707, l6: 0.804595

[epoch:  57/100, batch:   546/  792, ite: 60129] train loss: 4.532947, tar: 0.46

[epoch:  57/100, batch:   626/  792, ite: 60169] train loss: 4.542765, tar: 0.462934 
l0: 0.343279, l1: 0.345497, l2: 0.347704, l3: 0.352016, l4: 0.389466, l5: 0.560876, l6: 0.657065

[epoch:  57/100, batch:   628/  792, ite: 60170] train loss: 4.538377, tar: 0.462230 
l0: 0.981533, l1: 0.988818, l2: 0.996105, l3: 0.997578, l4: 1.045218, l5: 1.171171, l6: 1.517613

[epoch:  57/100, batch:   630/  792, ite: 60171] train loss: 4.565172, tar: 0.465267 
l0: 0.525973, l1: 0.525125, l2: 0.523274, l3: 0.526845, l4: 0.544004, l5: 0.647149, l6: 0.763911

[epoch:  57/100, batch:   632/  792, ite: 60172] train loss: 4.567092, tar: 0.465620 
l0: 0.368824, l1: 0.365787, l2: 0.363252, l3: 0.361591, l4: 0.405809, l5: 0.523775, l6: 0.722226

[epoch:  57/100, batch:   634/  792, ite: 60173] train loss: 4.562924, tar: 0.465061 
l0: 0.211536, l1: 0.214680, l2: 0.212824, l3: 0.224754, l4: 0.252210, l5: 0.418662, l6: 0.488567

[epoch:  57/100, batch:   636/  792, ite: 60174] train loss: 4.551394, tar: 0.46

[epoch:  57/100, batch:   716/  792, ite: 60214] train loss: 4.626619, tar: 0.474004 
l0: 0.852986, l1: 0.859016, l2: 0.866630, l3: 0.878994, l4: 0.892076, l5: 0.914267, l6: 0.930722

[epoch:  57/100, batch:   718/  792, ite: 60215] train loss: 4.638564, tar: 0.475767 
l0: 0.415146, l1: 0.416101, l2: 0.414739, l3: 0.417532, l4: 0.438119, l5: 0.475873, l6: 0.551678

[epoch:  57/100, batch:   720/  792, ite: 60216] train loss: 4.634514, tar: 0.475486 
l0: 0.841133, l1: 0.848088, l2: 0.845981, l3: 0.839508, l4: 0.871140, l5: 0.945182, l6: 1.168224

[epoch:  57/100, batch:   722/  792, ite: 60217] train loss: 4.647661, tar: 0.477171 
l0: 0.667827, l1: 0.666551, l2: 0.668308, l3: 0.666830, l4: 0.698897, l5: 0.838426, l6: 1.017760

[epoch:  57/100, batch:   724/  792, ite: 60218] train loss: 4.655547, tar: 0.478046 
l0: 0.319446, l1: 0.325569, l2: 0.326672, l3: 0.331505, l4: 0.327261, l5: 0.386678, l6: 0.504756

[epoch:  57/100, batch:   726/  792, ite: 60219] train loss: 4.648199, tar: 0.47

l0: 0.376880, l1: 0.382712, l2: 0.382560, l3: 0.381656, l4: 0.388774, l5: 0.540559, l6: 0.705584

[epoch:  58/100, batch:    14/  792, ite: 60259] train loss: 4.629661, tar: 0.476773 
l0: 0.585926, l1: 0.580738, l2: 0.571599, l3: 0.567533, l4: 0.575851, l5: 0.659650, l6: 0.675838

[epoch:  58/100, batch:    16/  792, ite: 60260] train loss: 4.630735, tar: 0.477193 
l0: 0.504163, l1: 0.505062, l2: 0.503195, l3: 0.509885, l4: 0.556910, l5: 0.630671, l6: 0.877151

[epoch:  58/100, batch:    18/  792, ite: 60261] train loss: 4.632290, tar: 0.477296 
l0: 0.731138, l1: 0.734324, l2: 0.734387, l3: 0.735395, l4: 0.779467, l5: 0.958141, l6: 1.197118

[epoch:  58/100, batch:    20/  792, ite: 60262] train loss: 4.641661, tar: 0.478265 
l0: 0.273323, l1: 0.274111, l2: 0.270070, l3: 0.272073, l4: 0.290601, l5: 0.352907, l6: 0.435999

[epoch:  58/100, batch:    22/  792, ite: 60263] train loss: 4.634354, tar: 0.477486 
l0: 0.372842, l1: 0.379516, l2: 0.379011, l3: 0.385093, l4: 0.439932, l5: 0.5442

[epoch:  58/100, batch:   102/  792, ite: 60303] train loss: 4.594618, tar: 0.470796 
l0: 0.476645, l1: 0.478068, l2: 0.483823, l3: 0.494900, l4: 0.552707, l5: 0.680574, l6: 0.790065

[epoch:  58/100, batch:   104/  792, ite: 60304] train loss: 4.595192, tar: 0.470815 
l0: 1.043751, l1: 1.069197, l2: 1.085791, l3: 1.114725, l4: 1.201962, l5: 1.445524, l6: 1.625273

[epoch:  58/100, batch:   106/  792, ite: 60305] train loss: 4.614883, tar: 0.472694 
l0: 0.523204, l1: 0.525270, l2: 0.527042, l3: 0.529995, l4: 0.535635, l5: 0.583656, l6: 0.683031

[epoch:  58/100, batch:   108/  792, ite: 60306] train loss: 4.615127, tar: 0.472859 
l0: 0.380474, l1: 0.388510, l2: 0.386836, l3: 0.388403, l4: 0.395086, l5: 0.470120, l6: 0.573689

[epoch:  58/100, batch:   110/  792, ite: 60307] train loss: 4.611886, tar: 0.472558 
l0: 0.261276, l1: 0.260752, l2: 0.261958, l3: 0.267880, l4: 0.310785, l5: 0.424840, l6: 0.617184

[epoch:  58/100, batch:   112/  792, ite: 60308] train loss: 4.606921, tar: 0.47

[epoch:  58/100, batch:   192/  792, ite: 60348] train loss: 4.601095, tar: 0.471482 
l0: 0.723610, l1: 0.730927, l2: 0.730723, l3: 0.735193, l4: 0.736198, l5: 0.880460, l6: 1.027474

[epoch:  58/100, batch:   194/  792, ite: 60349] train loss: 4.607128, tar: 0.472204 
l0: 0.716555, l1: 0.716366, l2: 0.715968, l3: 0.715402, l4: 0.755054, l5: 0.788357, l6: 1.015038

[epoch:  58/100, batch:   196/  792, ite: 60350] train loss: 4.612474, tar: 0.472902 
l0: 0.322921, l1: 0.321740, l2: 0.321718, l3: 0.329790, l4: 0.357743, l5: 0.450638, l6: 0.719371

[epoch:  58/100, batch:   198/  792, ite: 60351] train loss: 4.609563, tar: 0.472475 
l0: 0.406734, l1: 0.409048, l2: 0.406117, l3: 0.411625, l4: 0.430238, l5: 0.541577, l6: 0.626675

[epoch:  58/100, batch:   200/  792, ite: 60352] train loss: 4.607476, tar: 0.472288 
l0: 0.281815, l1: 0.272500, l2: 0.272348, l3: 0.286424, l4: 0.292363, l5: 0.391731, l6: 0.545135

[epoch:  58/100, batch:   202/  792, ite: 60353] train loss: 4.602773, tar: 0.47

[epoch:  58/100, batch:   282/  792, ite: 60393] train loss: 4.588504, tar: 0.470465 
l0: 0.502719, l1: 0.508558, l2: 0.514173, l3: 0.517347, l4: 0.553585, l5: 0.603486, l6: 0.760470

[epoch:  58/100, batch:   284/  792, ite: 60394] train loss: 4.589111, tar: 0.470547 
l0: 0.607437, l1: 0.609890, l2: 0.606920, l3: 0.612939, l4: 0.651126, l5: 0.723223, l6: 0.934872

[epoch:  58/100, batch:   286/  792, ite: 60395] train loss: 4.591983, tar: 0.470893 
l0: 0.591903, l1: 0.592027, l2: 0.590488, l3: 0.594623, l4: 0.625841, l5: 0.725490, l6: 0.890772

[epoch:  58/100, batch:   288/  792, ite: 60396] train loss: 4.594799, tar: 0.471199 
l0: 0.319380, l1: 0.320518, l2: 0.320236, l3: 0.324266, l4: 0.347693, l5: 0.417002, l6: 0.573350

[epoch:  58/100, batch:   290/  792, ite: 60397] train loss: 4.591361, tar: 0.470816 
l0: 0.382563, l1: 0.385891, l2: 0.384784, l3: 0.385570, l4: 0.394574, l5: 0.438477, l6: 0.569471

[epoch:  58/100, batch:   292/  792, ite: 60398] train loss: 4.588817, tar: 0.47

[epoch:  58/100, batch:   372/  792, ite: 60438] train loss: 4.547140, tar: 0.465090 
l0: 0.619205, l1: 0.633074, l2: 0.633438, l3: 0.638164, l4: 0.645222, l5: 0.716832, l6: 0.797834

[epoch:  58/100, batch:   374/  792, ite: 60439] train loss: 4.549557, tar: 0.465441 
l0: 0.384414, l1: 0.385974, l2: 0.389310, l3: 0.394139, l4: 0.429930, l5: 0.528182, l6: 0.663509

[epoch:  58/100, batch:   376/  792, ite: 60440] train loss: 4.548088, tar: 0.465257 
l0: 0.542514, l1: 0.541694, l2: 0.545401, l3: 0.552514, l4: 0.563604, l5: 0.626606, l6: 0.852473

[epoch:  58/100, batch:   378/  792, ite: 60441] train loss: 4.549479, tar: 0.465432 
l0: 0.268711, l1: 0.274301, l2: 0.274123, l3: 0.279732, l4: 0.319746, l5: 0.442239, l6: 0.546314

[epoch:  58/100, batch:   380/  792, ite: 60442] train loss: 4.545955, tar: 0.464987 
l0: 0.567214, l1: 0.574663, l2: 0.573929, l3: 0.575422, l4: 0.570113, l5: 0.618946, l6: 0.734259

[epoch:  58/100, batch:   382/  792, ite: 60443] train loss: 4.546853, tar: 0.46

[epoch:  58/100, batch:   462/  792, ite: 60483] train loss: 4.546607, tar: 0.465218 
l0: 0.557272, l1: 0.558857, l2: 0.559523, l3: 0.560106, l4: 0.585416, l5: 0.656686, l6: 0.816143

[epoch:  58/100, batch:   464/  792, ite: 60484] train loss: 4.547849, tar: 0.465409 
l0: 0.670785, l1: 0.679906, l2: 0.680193, l3: 0.700083, l4: 0.732798, l5: 0.885639, l6: 1.288340

[epoch:  58/100, batch:   466/  792, ite: 60485] train loss: 4.553130, tar: 0.465832 
l0: 0.232314, l1: 0.233863, l2: 0.234622, l3: 0.239617, l4: 0.256832, l5: 0.307442, l6: 0.423154

[epoch:  58/100, batch:   468/  792, ite: 60486] train loss: 4.548701, tar: 0.465352 
l0: 0.773791, l1: 0.784845, l2: 0.788115, l3: 0.777947, l4: 0.801452, l5: 0.832620, l6: 0.979017

[epoch:  58/100, batch:   470/  792, ite: 60487] train loss: 4.553312, tar: 0.465985 
l0: 0.599011, l1: 0.605432, l2: 0.607259, l3: 0.606464, l4: 0.613861, l5: 0.656877, l6: 0.699937

[epoch:  58/100, batch:   472/  792, ite: 60488] train loss: 4.554405, tar: 0.46

[epoch:  58/100, batch:   552/  792, ite: 60528] train loss: 4.548087, tar: 0.465994 
l0: 0.315825, l1: 0.319558, l2: 0.321073, l3: 0.327405, l4: 0.340193, l5: 0.402422, l6: 0.561045

[epoch:  58/100, batch:   554/  792, ite: 60529] train loss: 4.545675, tar: 0.465710 
l0: 0.679857, l1: 0.688082, l2: 0.689531, l3: 0.689144, l4: 0.702443, l5: 0.737100, l6: 0.873668

[epoch:  58/100, batch:   556/  792, ite: 60530] train loss: 4.548428, tar: 0.466114 
l0: 0.295888, l1: 0.298537, l2: 0.299247, l3: 0.291042, l4: 0.284937, l5: 0.438440, l6: 0.613656

[epoch:  58/100, batch:   558/  792, ite: 60531] train loss: 4.546101, tar: 0.465794 
l0: 0.422008, l1: 0.427496, l2: 0.431305, l3: 0.411989, l4: 0.424271, l5: 0.521796, l6: 0.629322

[epoch:  58/100, batch:   560/  792, ite: 60532] train loss: 4.544901, tar: 0.465711 
l0: 0.391208, l1: 0.391833, l2: 0.393166, l3: 0.392887, l4: 0.402118, l5: 0.422819, l6: 0.500286

[epoch:  58/100, batch:   562/  792, ite: 60533] train loss: 4.542924, tar: 0.46

[epoch:  58/100, batch:   642/  792, ite: 60573] train loss: 4.557910, tar: 0.467377 
l0: 0.325222, l1: 0.326304, l2: 0.328637, l3: 0.335818, l4: 0.354405, l5: 0.523956, l6: 0.660736

[epoch:  58/100, batch:   644/  792, ite: 60574] train loss: 4.556262, tar: 0.467130 
l0: 0.425775, l1: 0.415864, l2: 0.418690, l3: 0.411978, l4: 0.419681, l5: 0.480225, l6: 0.660384

[epoch:  58/100, batch:   646/  792, ite: 60575] train loss: 4.555154, tar: 0.467058 
l0: 0.466150, l1: 0.466350, l2: 0.467218, l3: 0.466886, l4: 0.487976, l5: 0.629388, l6: 0.868942

[epoch:  58/100, batch:   648/  792, ite: 60576] train loss: 4.555316, tar: 0.467056 
l0: 0.357454, l1: 0.373572, l2: 0.372731, l3: 0.380617, l4: 0.426659, l5: 0.523379, l6: 0.639383

[epoch:  58/100, batch:   650/  792, ite: 60577] train loss: 4.553915, tar: 0.466866 
l0: 0.412468, l1: 0.417388, l2: 0.419570, l3: 0.440565, l4: 0.458275, l5: 0.533423, l6: 0.628325

[epoch:  58/100, batch:   652/  792, ite: 60578] train loss: 4.553035, tar: 0.46

[epoch:  58/100, batch:   732/  792, ite: 60618] train loss: 4.557848, tar: 0.467785 
l0: 0.398273, l1: 0.400202, l2: 0.400552, l3: 0.403434, l4: 0.418493, l5: 0.532505, l6: 0.659403

[epoch:  58/100, batch:   734/  792, ite: 60619] train loss: 4.556771, tar: 0.467672 
l0: 0.373371, l1: 0.376764, l2: 0.372567, l3: 0.382923, l4: 0.422014, l5: 0.507294, l6: 0.745674

[epoch:  58/100, batch:   736/  792, ite: 60620] train loss: 4.555730, tar: 0.467520 
l0: 0.304386, l1: 0.306791, l2: 0.307441, l3: 0.313870, l4: 0.359022, l5: 0.503514, l6: 0.647651

[epoch:  58/100, batch:   738/  792, ite: 60621] train loss: 4.554094, tar: 0.467258 
l0: 0.266909, l1: 0.273195, l2: 0.276651, l3: 0.278289, l4: 0.310204, l5: 0.380247, l6: 0.525320

[epoch:  58/100, batch:   740/  792, ite: 60622] train loss: 4.551312, tar: 0.466935 
l0: 0.425971, l1: 0.432204, l2: 0.430530, l3: 0.430391, l4: 0.442924, l5: 0.543064, l6: 0.642262

[epoch:  58/100, batch:   742/  792, ite: 60623] train loss: 4.550489, tar: 0.46

l0: 0.475619, l1: 0.476678, l2: 0.477735, l3: 0.479975, l4: 0.525796, l5: 0.538072, l6: 0.745743

[epoch:  59/100, batch:    30/  792, ite: 60663] train loss: 4.565651, tar: 0.468822 
l0: 0.274479, l1: 0.274982, l2: 0.272131, l3: 0.282419, l4: 0.319679, l5: 0.438655, l6: 0.567161

[epoch:  59/100, batch:    32/  792, ite: 60664] train loss: 4.563364, tar: 0.468530 
l0: 0.756911, l1: 0.757493, l2: 0.757301, l3: 0.763353, l4: 0.802684, l5: 0.907043, l6: 0.869408

[epoch:  59/100, batch:    34/  792, ite: 60665] train loss: 4.566143, tar: 0.468963 
l0: 0.310680, l1: 0.311407, l2: 0.309561, l3: 0.315717, l4: 0.346601, l5: 0.395544, l6: 0.506640

[epoch:  59/100, batch:    36/  792, ite: 60666] train loss: 4.563880, tar: 0.468726 
l0: 0.323963, l1: 0.325432, l2: 0.328642, l3: 0.329017, l4: 0.340407, l5: 0.399628, l6: 0.482351

[epoch:  59/100, batch:    38/  792, ite: 60667] train loss: 4.561635, tar: 0.468509 
l0: 0.510764, l1: 0.515271, l2: 0.516992, l3: 0.522662, l4: 0.570764, l5: 0.6954

[epoch:  59/100, batch:   118/  792, ite: 60707] train loss: 4.555473, tar: 0.467924 
l0: 0.462119, l1: 0.462374, l2: 0.459914, l3: 0.455447, l4: 0.485390, l5: 0.601170, l6: 0.859600

[epoch:  59/100, batch:   120/  792, ite: 60708] train loss: 4.555606, tar: 0.467916 
l0: 0.386353, l1: 0.385473, l2: 0.388588, l3: 0.396370, l4: 0.413937, l5: 0.540214, l6: 0.625390

[epoch:  59/100, batch:   122/  792, ite: 60709] train loss: 4.554654, tar: 0.467801 
l0: 0.520678, l1: 0.524506, l2: 0.526558, l3: 0.526160, l4: 0.554768, l5: 0.581439, l6: 0.729292

[epoch:  59/100, batch:   124/  792, ite: 60710] train loss: 4.554936, tar: 0.467875 
l0: 0.182117, l1: 0.189188, l2: 0.189772, l3: 0.205253, l4: 0.234599, l5: 0.371616, l6: 0.372503

[epoch:  59/100, batch:   126/  792, ite: 60711] train loss: 4.551607, tar: 0.467473 
l0: 0.187381, l1: 0.188474, l2: 0.187812, l3: 0.200594, l4: 0.236514, l5: 0.312853, l6: 0.441741

[epoch:  59/100, batch:   128/  792, ite: 60712] train loss: 4.548319, tar: 0.46

[epoch:  59/100, batch:   208/  792, ite: 60752] train loss: 4.538682, tar: 0.466264 
l0: 0.348803, l1: 0.349638, l2: 0.349977, l3: 0.356172, l4: 0.384530, l5: 0.464603, l6: 0.610277

[epoch:  59/100, batch:   210/  792, ite: 60753] train loss: 4.537407, tar: 0.466108 
l0: 0.515856, l1: 0.514283, l2: 0.511607, l3: 0.516143, l4: 0.561324, l5: 0.684336, l6: 0.981196

[epoch:  59/100, batch:   212/  792, ite: 60754] train loss: 4.538548, tar: 0.466174 
l0: 0.311090, l1: 0.310020, l2: 0.310531, l3: 0.308876, l4: 0.345327, l5: 0.487615, l6: 0.669754

[epoch:  59/100, batch:   214/  792, ite: 60755] train loss: 4.537077, tar: 0.465969 
l0: 0.338971, l1: 0.338419, l2: 0.338706, l3: 0.347086, l4: 0.380321, l5: 0.484670, l6: 0.663221

[epoch:  59/100, batch:   216/  792, ite: 60756] train loss: 4.535827, tar: 0.465801 
l0: 0.612473, l1: 0.619054, l2: 0.609433, l3: 0.610097, l4: 0.672722, l5: 0.802436, l6: 0.932988

[epoch:  59/100, batch:   218/  792, ite: 60757] train loss: 4.537621, tar: 0.46

[epoch:  59/100, batch:   298/  792, ite: 60797] train loss: 4.541913, tar: 0.466546 
l0: 0.532854, l1: 0.533018, l2: 0.540107, l3: 0.538522, l4: 0.549684, l5: 0.634777, l6: 0.780442

[epoch:  59/100, batch:   300/  792, ite: 60798] train loss: 4.542502, tar: 0.466629 
l0: 0.748529, l1: 0.742034, l2: 0.740427, l3: 0.738192, l4: 0.770601, l5: 0.740831, l6: 0.748388

[epoch:  59/100, batch:   302/  792, ite: 60799] train loss: 4.544320, tar: 0.466982 
l0: 0.296310, l1: 0.290577, l2: 0.291859, l3: 0.301447, l4: 0.332851, l5: 0.408698, l6: 0.654079

[epoch:  59/100, batch:   304/  792, ite: 60800] train loss: 4.542834, tar: 0.466769 
l0: 0.379498, l1: 0.376985, l2: 0.376843, l3: 0.384437, l4: 0.409540, l5: 0.450981, l6: 0.610168

[epoch:  59/100, batch:   306/  792, ite: 60801] train loss: 4.541763, tar: 0.466660 
l0: 0.401923, l1: 0.407753, l2: 0.409527, l3: 0.406246, l4: 0.426307, l5: 0.614876, l6: 0.752569

[epoch:  59/100, batch:   308/  792, ite: 60802] train loss: 4.541421, tar: 0.46

[epoch:  59/100, batch:   388/  792, ite: 60842] train loss: 4.555252, tar: 0.468684 
l0: 0.718720, l1: 0.724773, l2: 0.721881, l3: 0.709006, l4: 0.718645, l5: 0.700935, l6: 0.859986

[epoch:  59/100, batch:   390/  792, ite: 60843] train loss: 4.556961, tar: 0.468981 
l0: 0.686906, l1: 0.680866, l2: 0.682695, l3: 0.679094, l4: 0.709912, l5: 0.742462, l6: 0.854818

[epoch:  59/100, batch:   392/  792, ite: 60844] train loss: 4.558514, tar: 0.469239 
l0: 0.405246, l1: 0.414623, l2: 0.414252, l3: 0.417020, l4: 0.426106, l5: 0.487187, l6: 0.648708

[epoch:  59/100, batch:   394/  792, ite: 60845] train loss: 4.557780, tar: 0.469163 
l0: 0.663178, l1: 0.668825, l2: 0.672976, l3: 0.679406, l4: 0.699234, l5: 0.819270, l6: 0.992905

[epoch:  59/100, batch:   396/  792, ite: 60846] train loss: 4.559677, tar: 0.469393 
l0: 0.207934, l1: 0.218367, l2: 0.221673, l3: 0.237050, l4: 0.270476, l5: 0.381248, l6: 0.531813

[epoch:  59/100, batch:   398/  792, ite: 60847] train loss: 4.557503, tar: 0.46

[epoch:  59/100, batch:   478/  792, ite: 60887] train loss: 4.552269, tar: 0.468881 
l0: 0.399239, l1: 0.405020, l2: 0.402084, l3: 0.408017, l4: 0.442669, l5: 0.426208, l6: 0.492124

[epoch:  59/100, batch:   480/  792, ite: 60888] train loss: 4.551207, tar: 0.468803 
l0: 0.339977, l1: 0.341928, l2: 0.344942, l3: 0.348186, l4: 0.371362, l5: 0.478377, l6: 0.673664

[epoch:  59/100, batch:   482/  792, ite: 60889] train loss: 4.550164, tar: 0.468658 
l0: 0.625890, l1: 0.626717, l2: 0.628953, l3: 0.633567, l4: 0.648763, l5: 0.680704, l6: 0.964199

[epoch:  59/100, batch:   484/  792, ite: 60890] train loss: 4.551564, tar: 0.468835 
l0: 0.455364, l1: 0.450511, l2: 0.451328, l3: 0.461315, l4: 0.503769, l5: 0.600555, l6: 0.728948

[epoch:  59/100, batch:   486/  792, ite: 60891] train loss: 4.551464, tar: 0.468820 
l0: 0.389619, l1: 0.391351, l2: 0.389779, l3: 0.394361, l4: 0.412579, l5: 0.469901, l6: 0.567466

[epoch:  59/100, batch:   488/  792, ite: 60892] train loss: 4.550404, tar: 0.46

[epoch:  59/100, batch:   568/  792, ite: 60932] train loss: 4.541235, tar: 0.467532 
l0: 0.838970, l1: 0.834862, l2: 0.827412, l3: 0.826932, l4: 0.853770, l5: 0.833890, l6: 0.850888

[epoch:  59/100, batch:   570/  792, ite: 60933] train loss: 4.543522, tar: 0.467930 
l0: 0.502461, l1: 0.504687, l2: 0.502209, l3: 0.504320, l4: 0.516872, l5: 0.598989, l6: 0.738255

[epoch:  59/100, batch:   572/  792, ite: 60934] train loss: 4.543653, tar: 0.467967 
l0: 0.499644, l1: 0.503694, l2: 0.502644, l3: 0.506489, l4: 0.535931, l5: 0.650076, l6: 0.856403

[epoch:  59/100, batch:   574/  792, ite: 60935] train loss: 4.544073, tar: 0.468001 
l0: 0.478008, l1: 0.478900, l2: 0.478936, l3: 0.484179, l4: 0.506889, l5: 0.615009, l6: 0.769333

[epoch:  59/100, batch:   576/  792, ite: 60936] train loss: 4.544221, tar: 0.468012 
l0: 0.237277, l1: 0.240156, l2: 0.239634, l3: 0.244043, l4: 0.264719, l5: 0.355866, l6: 0.474618

[epoch:  59/100, batch:   578/  792, ite: 60937] train loss: 4.542111, tar: 0.46

[epoch:  59/100, batch:   658/  792, ite: 60977] train loss: 4.544174, tar: 0.468025 
l0: 0.303931, l1: 0.306365, l2: 0.306904, l3: 0.309552, l4: 0.357766, l5: 0.526555, l6: 0.666685

[epoch:  59/100, batch:   660/  792, ite: 60978] train loss: 4.543111, tar: 0.467858 
l0: 0.601620, l1: 0.605749, l2: 0.602564, l3: 0.614949, l4: 0.652442, l5: 0.716293, l6: 0.733824

[epoch:  59/100, batch:   662/  792, ite: 60979] train loss: 4.543943, tar: 0.467994 
l0: 0.447412, l1: 0.452206, l2: 0.446860, l3: 0.440850, l4: 0.459036, l5: 0.527261, l6: 0.618399

[epoch:  59/100, batch:   664/  792, ite: 60980] train loss: 4.543438, tar: 0.467973 
l0: 0.461638, l1: 0.473595, l2: 0.472890, l3: 0.471000, l4: 0.480019, l5: 0.593461, l6: 0.748770

[epoch:  59/100, batch:   666/  792, ite: 60981] train loss: 4.543400, tar: 0.467967 
l0: 0.461026, l1: 0.457197, l2: 0.457864, l3: 0.462846, l4: 0.474441, l5: 0.556732, l6: 0.628507

[epoch:  59/100, batch:   668/  792, ite: 60982] train loss: 4.543059, tar: 0.46

[epoch:  59/100, batch:   748/  792, ite: 61022] train loss: 4.556443, tar: 0.469461 
l0: 0.315339, l1: 0.319145, l2: 0.316648, l3: 0.313510, l4: 0.335071, l5: 0.379940, l6: 0.480038

[epoch:  59/100, batch:   750/  792, ite: 61023] train loss: 4.554842, tar: 0.469310 
l0: 0.383634, l1: 0.384320, l2: 0.384840, l3: 0.381029, l4: 0.432173, l5: 0.558236, l6: 0.745595

[epoch:  59/100, batch:   752/  792, ite: 61024] train loss: 4.554366, tar: 0.469226 
l0: 0.678156, l1: 0.683474, l2: 0.685049, l3: 0.692701, l4: 0.715450, l5: 0.923041, l6: 1.157747

[epoch:  59/100, batch:   754/  792, ite: 61025] train loss: 4.556602, tar: 0.469430 
l0: 0.518174, l1: 0.519630, l2: 0.519276, l3: 0.524265, l4: 0.517731, l5: 0.613737, l6: 0.819109

[epoch:  59/100, batch:   756/  792, ite: 61026] train loss: 4.556933, tar: 0.469478 
l0: 0.537161, l1: 0.537775, l2: 0.536979, l3: 0.531824, l4: 0.542414, l5: 0.633068, l6: 0.737597

[epoch:  59/100, batch:   758/  792, ite: 61027] train loss: 4.557305, tar: 0.46

l0: 0.741908, l1: 0.742092, l2: 0.739137, l3: 0.738315, l4: 0.779257, l5: 0.757597, l6: 0.843761

[epoch:  60/100, batch:    46/  792, ite: 61067] train loss: 4.549309, tar: 0.468376 
l0: 0.331786, l1: 0.331804, l2: 0.329880, l3: 0.338233, l4: 0.358424, l5: 0.429336, l6: 0.502387

[epoch:  60/100, batch:    48/  792, ite: 61068] train loss: 4.548051, tar: 0.468248 
l0: 0.658876, l1: 0.658070, l2: 0.657725, l3: 0.643824, l4: 0.673327, l5: 0.769069, l6: 0.953109

[epoch:  60/100, batch:    50/  792, ite: 61069] train loss: 4.549498, tar: 0.468426 
l0: 0.509587, l1: 0.507059, l2: 0.507070, l3: 0.515931, l4: 0.541135, l5: 0.698702, l6: 0.948617

[epoch:  60/100, batch:    52/  792, ite: 61070] train loss: 4.550166, tar: 0.468465 
l0: 0.334560, l1: 0.334159, l2: 0.331292, l3: 0.336200, l4: 0.360876, l5: 0.481937, l6: 0.623586

[epoch:  60/100, batch:    54/  792, ite: 61071] train loss: 4.549122, tar: 0.468340 
l0: 0.456555, l1: 0.463659, l2: 0.465073, l3: 0.463618, l4: 0.476381, l5: 0.5698

[epoch:  60/100, batch:   134/  792, ite: 61111] train loss: 4.539922, tar: 0.467055 
l0: 0.362648, l1: 0.364826, l2: 0.364208, l3: 0.364280, l4: 0.410489, l5: 0.566644, l6: 0.659285

[epoch:  60/100, batch:   136/  792, ite: 61112] train loss: 4.539361, tar: 0.466961 
l0: 0.371442, l1: 0.374067, l2: 0.373858, l3: 0.388409, l4: 0.445391, l5: 0.532988, l6: 0.663310

[epoch:  60/100, batch:   138/  792, ite: 61113] train loss: 4.538757, tar: 0.466875 
l0: 0.525293, l1: 0.540498, l2: 0.546241, l3: 0.554092, l4: 0.577816, l5: 0.658905, l6: 0.783849

[epoch:  60/100, batch:   140/  792, ite: 61114] train loss: 4.539250, tar: 0.466928 
l0: 0.252609, l1: 0.251021, l2: 0.254247, l3: 0.252786, l4: 0.261611, l5: 0.351848, l6: 0.506170

[epoch:  60/100, batch:   142/  792, ite: 61115] train loss: 4.537547, tar: 0.466736 
l0: 0.397670, l1: 0.398568, l2: 0.396135, l3: 0.395438, l4: 0.416455, l5: 0.486720, l6: 0.645265

[epoch:  60/100, batch:   144/  792, ite: 61116] train loss: 4.536920, tar: 0.46

[epoch:  60/100, batch:   224/  792, ite: 61156] train loss: 4.522358, tar: 0.464836 
l0: 0.574921, l1: 0.573928, l2: 0.577676, l3: 0.574435, l4: 0.576316, l5: 0.709672, l6: 0.781350

[epoch:  60/100, batch:   226/  792, ite: 61157] train loss: 4.523006, tar: 0.464931 
l0: 0.741118, l1: 0.744671, l2: 0.731854, l3: 0.709747, l4: 0.689523, l5: 0.686707, l6: 0.700939

[epoch:  60/100, batch:   228/  792, ite: 61158] train loss: 4.524066, tar: 0.465170 
l0: 0.562956, l1: 0.559915, l2: 0.561281, l3: 0.551961, l4: 0.543507, l5: 0.676376, l6: 0.846293

[epoch:  60/100, batch:   230/  792, ite: 61159] train loss: 4.524757, tar: 0.465254 
l0: 0.376598, l1: 0.383205, l2: 0.384571, l3: 0.389951, l4: 0.412195, l5: 0.484438, l6: 0.530088

[epoch:  60/100, batch:   232/  792, ite: 61160] train loss: 4.523902, tar: 0.465178 
l0: 0.286065, l1: 0.282370, l2: 0.282669, l3: 0.286988, l4: 0.316012, l5: 0.440510, l6: 0.568045

[epoch:  60/100, batch:   234/  792, ite: 61161] train loss: 4.522735, tar: 0.46

[epoch:  60/100, batch:   314/  792, ite: 61201] train loss: 4.511823, tar: 0.463534 
l0: 0.722097, l1: 0.726246, l2: 0.728121, l3: 0.728227, l4: 0.798232, l5: 0.875713, l6: 1.029749

[epoch:  60/100, batch:   316/  792, ite: 61202] train loss: 4.513677, tar: 0.463749 
l0: 0.503499, l1: 0.507124, l2: 0.510983, l3: 0.516274, l4: 0.525953, l5: 0.643748, l6: 0.748815

[epoch:  60/100, batch:   318/  792, ite: 61203] train loss: 4.513961, tar: 0.463782 
l0: 0.377406, l1: 0.374948, l2: 0.374986, l3: 0.396227, l4: 0.419814, l5: 0.499527, l6: 0.621822

[epoch:  60/100, batch:   320/  792, ite: 61204] train loss: 4.513270, tar: 0.463710 
l0: 0.592483, l1: 0.607602, l2: 0.605616, l3: 0.614815, l4: 0.643841, l5: 0.698975, l6: 0.876630

[epoch:  60/100, batch:   322/  792, ite: 61205] train loss: 4.514161, tar: 0.463817 
l0: 0.936590, l1: 0.942657, l2: 0.945249, l3: 0.945821, l4: 0.953585, l5: 0.952841, l6: 1.081678

[epoch:  60/100, batch:   324/  792, ite: 61206] train loss: 4.516934, tar: 0.46

[epoch:  60/100, batch:   404/  792, ite: 61246] train loss: 4.528566, tar: 0.465704 
l0: 0.432227, l1: 0.436452, l2: 0.432474, l3: 0.437419, l4: 0.464165, l5: 0.568144, l6: 0.695675

[epoch:  60/100, batch:   406/  792, ite: 61247] train loss: 4.528310, tar: 0.465677 
l0: 0.351587, l1: 0.358474, l2: 0.357216, l3: 0.365747, l4: 0.400407, l5: 0.598613, l6: 0.763408

[epoch:  60/100, batch:   408/  792, ite: 61248] train loss: 4.527859, tar: 0.465586 
l0: 0.418756, l1: 0.415108, l2: 0.412714, l3: 0.422151, l4: 0.454016, l5: 0.528845, l6: 0.630752

[epoch:  60/100, batch:   410/  792, ite: 61249] train loss: 4.527384, tar: 0.465548 
l0: 0.235076, l1: 0.235986, l2: 0.234295, l3: 0.239010, l4: 0.257849, l5: 0.331979, l6: 0.569397

[epoch:  60/100, batch:   412/  792, ite: 61250] train loss: 4.525905, tar: 0.465364 
l0: 0.463906, l1: 0.467898, l2: 0.468155, l3: 0.471552, l4: 0.490493, l5: 0.554997, l6: 0.734924

[epoch:  60/100, batch:   414/  792, ite: 61251] train loss: 4.525891, tar: 0.46

[epoch:  60/100, batch:   494/  792, ite: 61291] train loss: 4.528328, tar: 0.465600 
l0: 0.363876, l1: 0.365099, l2: 0.368594, l3: 0.376796, l4: 0.413031, l5: 0.520266, l6: 0.715386

[epoch:  60/100, batch:   496/  792, ite: 61292] train loss: 4.527780, tar: 0.465522 
l0: 0.384735, l1: 0.379869, l2: 0.384265, l3: 0.387091, l4: 0.434826, l5: 0.542434, l6: 0.822474

[epoch:  60/100, batch:   498/  792, ite: 61293] train loss: 4.527525, tar: 0.465459 
l0: 0.308521, l1: 0.312496, l2: 0.311997, l3: 0.326135, l4: 0.398334, l5: 0.538017, l6: 0.621244

[epoch:  60/100, batch:   500/  792, ite: 61294] train loss: 4.526718, tar: 0.465338 
l0: 0.311045, l1: 0.316389, l2: 0.316655, l3: 0.326916, l4: 0.335848, l5: 0.389774, l6: 0.392761

[epoch:  60/100, batch:   502/  792, ite: 61295] train loss: 4.525434, tar: 0.465219 
l0: 0.424735, l1: 0.426080, l2: 0.424480, l3: 0.420940, l4: 0.452551, l5: 0.507431, l6: 0.695738

[epoch:  60/100, batch:   504/  792, ite: 61296] train loss: 4.525165, tar: 0.46

[epoch:  60/100, batch:   584/  792, ite: 61336] train loss: 4.540703, tar: 0.467340 
l0: 0.354299, l1: 0.351481, l2: 0.354378, l3: 0.353536, l4: 0.373832, l5: 0.480230, l6: 0.514263

[epoch:  60/100, batch:   586/  792, ite: 61337] train loss: 4.539879, tar: 0.467255 
l0: 0.310384, l1: 0.311906, l2: 0.311108, l3: 0.310254, l4: 0.344566, l5: 0.444203, l6: 0.552299

[epoch:  60/100, batch:   588/  792, ite: 61338] train loss: 4.538844, tar: 0.467138 
l0: 0.430956, l1: 0.432095, l2: 0.430755, l3: 0.433561, l4: 0.452857, l5: 0.498113, l6: 0.601807

[epoch:  60/100, batch:   590/  792, ite: 61339] train loss: 4.538446, tar: 0.467111 
l0: 0.643767, l1: 0.655170, l2: 0.652249, l3: 0.664588, l4: 0.675158, l5: 0.815143, l6: 1.083726

[epoch:  60/100, batch:   592/  792, ite: 61340] train loss: 4.539813, tar: 0.467243 
l0: 0.404807, l1: 0.407205, l2: 0.408519, l3: 0.414782, l4: 0.420931, l5: 0.473593, l6: 0.806033

[epoch:  60/100, batch:   594/  792, ite: 61341] train loss: 4.539586, tar: 0.46

[epoch:  60/100, batch:   674/  792, ite: 61381] train loss: 4.543701, tar: 0.467652 
l0: 0.575393, l1: 0.589149, l2: 0.575232, l3: 0.562610, l4: 0.579960, l5: 0.703583, l6: 0.854665

[epoch:  60/100, batch:   676/  792, ite: 61382] train loss: 4.544384, tar: 0.467730 
l0: 0.341160, l1: 0.342515, l2: 0.342177, l3: 0.346610, l4: 0.370683, l5: 0.507748, l6: 0.661517

[epoch:  60/100, batch:   678/  792, ite: 61383] train loss: 4.543690, tar: 0.467638 
l0: 0.272585, l1: 0.274243, l2: 0.275192, l3: 0.281347, l4: 0.308970, l5: 0.386179, l6: 0.531476

[epoch:  60/100, batch:   680/  792, ite: 61384] train loss: 4.542523, tar: 0.467497 
l0: 0.401381, l1: 0.406519, l2: 0.402822, l3: 0.414381, l4: 0.441470, l5: 0.661790, l6: 0.982808

[epoch:  60/100, batch:   682/  792, ite: 61385] train loss: 4.542663, tar: 0.467449 
l0: 0.606905, l1: 0.609498, l2: 0.609433, l3: 0.618533, l4: 0.637302, l5: 0.736469, l6: 0.944923

[epoch:  60/100, batch:   684/  792, ite: 61386] train loss: 4.543605, tar: 0.46

[epoch:  60/100, batch:   764/  792, ite: 61426] train loss: 4.544407, tar: 0.467664 
l0: 0.715876, l1: 0.719581, l2: 0.720742, l3: 0.714891, l4: 0.707219, l5: 0.722278, l6: 0.694615

[epoch:  60/100, batch:   766/  792, ite: 61427] train loss: 4.545259, tar: 0.467838 
l0: 0.681567, l1: 0.677937, l2: 0.678089, l3: 0.680426, l4: 0.723531, l5: 0.764841, l6: 1.148374

[epoch:  60/100, batch:   768/  792, ite: 61428] train loss: 4.546634, tar: 0.467987 
l0: 0.501212, l1: 0.500331, l2: 0.502512, l3: 0.505789, l4: 0.500164, l5: 0.610754, l6: 0.706975

[epoch:  60/100, batch:   770/  792, ite: 61429] train loss: 4.546638, tar: 0.468011 
l0: 0.279115, l1: 0.279074, l2: 0.281573, l3: 0.284595, l4: 0.308264, l5: 0.396432, l6: 0.583049

[epoch:  60/100, batch:   772/  792, ite: 61430] train loss: 4.545558, tar: 0.467879 
l0: 0.206349, l1: 0.210585, l2: 0.210643, l3: 0.220314, l4: 0.242442, l5: 0.367360, l6: 0.470029

[epoch:  60/100, batch:   774/  792, ite: 61431] train loss: 4.544100, tar: 0.46

l0: 0.755111, l1: 0.754963, l2: 0.755565, l3: 0.761423, l4: 0.748803, l5: 0.814836, l6: 1.307512

[epoch:  61/100, batch:    62/  792, ite: 61471] train loss: 4.549728, tar: 0.468257 
l0: 0.419917, l1: 0.424667, l2: 0.427321, l3: 0.420281, l4: 0.434771, l5: 0.587228, l6: 0.711375

[epoch:  61/100, batch:    64/  792, ite: 61472] train loss: 4.549431, tar: 0.468224 
l0: 0.930954, l1: 0.948600, l2: 0.943072, l3: 0.960940, l4: 0.968904, l5: 0.947440, l6: 1.083604

[epoch:  61/100, batch:    66/  792, ite: 61473] train loss: 4.551679, tar: 0.468538 
l0: 0.615703, l1: 0.619105, l2: 0.619803, l3: 0.621023, l4: 0.648181, l5: 0.715402, l6: 0.753254

[epoch:  61/100, batch:    68/  792, ite: 61474] train loss: 4.552239, tar: 0.468638 
l0: 0.424947, l1: 0.425165, l2: 0.427626, l3: 0.433245, l4: 0.452538, l5: 0.583068, l6: 0.792555

[epoch:  61/100, batch:    70/  792, ite: 61475] train loss: 4.552153, tar: 0.468608 
l0: 0.557746, l1: 0.557854, l2: 0.559957, l3: 0.564251, l4: 0.592069, l5: 0.6230

[epoch:  61/100, batch:   150/  792, ite: 61515] train loss: 4.555051, tar: 0.468997 
l0: 0.465060, l1: 0.472569, l2: 0.473447, l3: 0.476824, l4: 0.523375, l5: 0.663460, l6: 0.800591

[epoch:  61/100, batch:   152/  792, ite: 61516] train loss: 4.555293, tar: 0.468995 
l0: 0.314222, l1: 0.316407, l2: 0.313664, l3: 0.318428, l4: 0.340947, l5: 0.468717, l6: 0.640258

[epoch:  61/100, batch:   154/  792, ite: 61517] train loss: 4.554562, tar: 0.468893 
l0: 0.642038, l1: 0.645055, l2: 0.648210, l3: 0.652311, l4: 0.678037, l5: 0.802424, l6: 0.959788

[epoch:  61/100, batch:   156/  792, ite: 61518] train loss: 4.555552, tar: 0.469007 
l0: 0.955644, l1: 0.972184, l2: 0.968528, l3: 0.963727, l4: 1.007464, l5: 1.086353, l6: 1.214172

[epoch:  61/100, batch:   158/  792, ite: 61519] train loss: 4.558106, tar: 0.469327 
l0: 0.921268, l1: 0.923258, l2: 0.920298, l3: 0.928638, l4: 0.944851, l5: 0.932017, l6: 1.230076

[epoch:  61/100, batch:   160/  792, ite: 61520] train loss: 4.560453, tar: 0.46

[epoch:  61/100, batch:   240/  792, ite: 61560] train loss: 4.545340, tar: 0.467808 
l0: 0.352264, l1: 0.355430, l2: 0.356590, l3: 0.360825, l4: 0.362802, l5: 0.470505, l6: 0.632290

[epoch:  61/100, batch:   242/  792, ite: 61561] train loss: 4.544804, tar: 0.467734 
l0: 0.731497, l1: 0.736081, l2: 0.730943, l3: 0.726019, l4: 0.732292, l5: 0.774515, l6: 0.915083

[epoch:  61/100, batch:   244/  792, ite: 61562] train loss: 4.546005, tar: 0.467902 
l0: 0.666396, l1: 0.667779, l2: 0.670590, l3: 0.672862, l4: 0.696616, l5: 0.805046, l6: 0.960295

[epoch:  61/100, batch:   246/  792, ite: 61563] train loss: 4.547007, tar: 0.468029 
l0: 0.404678, l1: 0.398891, l2: 0.397217, l3: 0.396776, l4: 0.428196, l5: 0.554375, l6: 0.699478

[epoch:  61/100, batch:   248/  792, ite: 61564] train loss: 4.546710, tar: 0.467989 
l0: 0.308800, l1: 0.315334, l2: 0.314530, l3: 0.319561, l4: 0.328653, l5: 0.395210, l6: 0.578522

[epoch:  61/100, batch:   250/  792, ite: 61565] train loss: 4.545883, tar: 0.46

[epoch:  61/100, batch:   330/  792, ite: 61605] train loss: 4.542446, tar: 0.467485 
l0: 0.372028, l1: 0.381584, l2: 0.381626, l3: 0.382516, l4: 0.420306, l5: 0.549387, l6: 0.651504

[epoch:  61/100, batch:   332/  792, ite: 61606] train loss: 4.542030, tar: 0.467426 
l0: 0.595722, l1: 0.600075, l2: 0.601750, l3: 0.605871, l4: 0.630564, l5: 0.722142, l6: 0.820637

[epoch:  61/100, batch:   334/  792, ite: 61607] train loss: 4.542627, tar: 0.467506 
l0: 0.259490, l1: 0.271227, l2: 0.273200, l3: 0.277843, l4: 0.328217, l5: 0.470093, l6: 0.604902

[epoch:  61/100, batch:   336/  792, ite: 61608] train loss: 4.541720, tar: 0.467376 
l0: 0.442587, l1: 0.445990, l2: 0.448404, l3: 0.454163, l4: 0.462726, l5: 0.499896, l6: 0.651760

[epoch:  61/100, batch:   338/  792, ite: 61609] train loss: 4.541423, tar: 0.467361 
l0: 0.485954, l1: 0.493353, l2: 0.491197, l3: 0.495436, l4: 0.496607, l5: 0.554450, l6: 0.633066

[epoch:  61/100, batch:   340/  792, ite: 61610] train loss: 4.541313, tar: 0.46

[epoch:  61/100, batch:   420/  792, ite: 61650] train loss: 4.540675, tar: 0.467182 
l0: 0.370134, l1: 0.369836, l2: 0.367054, l3: 0.364052, l4: 0.394415, l5: 0.562277, l6: 0.757565

[epoch:  61/100, batch:   422/  792, ite: 61651] train loss: 4.540325, tar: 0.467123 
l0: 0.353460, l1: 0.353575, l2: 0.354400, l3: 0.350274, l4: 0.368255, l5: 0.440617, l6: 0.516637

[epoch:  61/100, batch:   424/  792, ite: 61652] train loss: 4.539597, tar: 0.467054 
l0: 0.354326, l1: 0.355717, l2: 0.356373, l3: 0.362604, l4: 0.369564, l5: 0.431441, l6: 0.673982

[epoch:  61/100, batch:   426/  792, ite: 61653] train loss: 4.539000, tar: 0.466986 
l0: 0.472422, l1: 0.473445, l2: 0.472488, l3: 0.472634, l4: 0.502060, l5: 0.607413, l6: 0.695367

[epoch:  61/100, batch:   428/  792, ite: 61654] train loss: 4.538948, tar: 0.466990 
l0: 0.340179, l1: 0.342017, l2: 0.339343, l3: 0.341558, l4: 0.389393, l5: 0.455520, l6: 0.676743

[epoch:  61/100, batch:   430/  792, ite: 61655] train loss: 4.538450, tar: 0.46

[epoch:  61/100, batch:   510/  792, ite: 61695] train loss: 4.538573, tar: 0.466924 
l0: 0.478217, l1: 0.480082, l2: 0.479283, l3: 0.486087, l4: 0.515486, l5: 0.720423, l6: 1.159380

[epoch:  61/100, batch:   512/  792, ite: 61696] train loss: 4.539165, tar: 0.466930 
l0: 0.288461, l1: 0.292947, l2: 0.293095, l3: 0.293753, l4: 0.322077, l5: 0.484341, l6: 0.658189

[epoch:  61/100, batch:   514/  792, ite: 61697] train loss: 4.538478, tar: 0.466825 
l0: 0.503091, l1: 0.500857, l2: 0.501483, l3: 0.505457, l4: 0.522204, l5: 0.615649, l6: 0.845582

[epoch:  61/100, batch:   516/  792, ite: 61698] train loss: 4.538678, tar: 0.466846 
l0: 0.541750, l1: 0.546021, l2: 0.547834, l3: 0.558243, l4: 0.595815, l5: 0.674555, l6: 0.839164

[epoch:  61/100, batch:   518/  792, ite: 61699] train loss: 4.539130, tar: 0.466890 
l0: 0.766357, l1: 0.771141, l2: 0.770316, l3: 0.758012, l4: 0.796518, l5: 0.959750, l6: 1.051875

[epoch:  61/100, batch:   520/  792, ite: 61700] train loss: 4.540553, tar: 0.46

[epoch:  61/100, batch:   600/  792, ite: 61740] train loss: 4.542398, tar: 0.467346 
l0: 0.519125, l1: 0.522388, l2: 0.521520, l3: 0.521537, l4: 0.527077, l5: 0.600573, l6: 0.713063

[epoch:  61/100, batch:   602/  792, ite: 61741] train loss: 4.542486, tar: 0.467376 
l0: 0.325065, l1: 0.325560, l2: 0.323789, l3: 0.329207, l4: 0.349540, l5: 0.473541, l6: 0.571215

[epoch:  61/100, batch:   604/  792, ite: 61742] train loss: 4.541766, tar: 0.467294 
l0: 0.357106, l1: 0.355096, l2: 0.355783, l3: 0.366746, l4: 0.371073, l5: 0.492953, l6: 0.578348

[epoch:  61/100, batch:   606/  792, ite: 61743] train loss: 4.541158, tar: 0.467231 
l0: 1.165983, l1: 1.181996, l2: 1.182017, l3: 1.195152, l4: 1.212281, l5: 1.155587, l6: 1.157242

[epoch:  61/100, batch:   608/  792, ite: 61744] train loss: 4.543977, tar: 0.467632 
l0: 1.300788, l1: 1.332212, l2: 1.321331, l3: 1.335509, l4: 1.318771, l5: 1.399234, l6: 1.531170

[epoch:  61/100, batch:   610/  792, ite: 61745] train loss: 4.547743, tar: 0.46

[epoch:  61/100, batch:   690/  792, ite: 61785] train loss: 4.546313, tar: 0.467934 
l0: 0.187809, l1: 0.193138, l2: 0.194551, l3: 0.207490, l4: 0.223834, l5: 0.345856, l6: 0.435766

[epoch:  61/100, batch:   692/  792, ite: 61786] train loss: 4.545033, tar: 0.467777 
l0: 0.442163, l1: 0.441424, l2: 0.440904, l3: 0.443092, l4: 0.482458, l5: 0.634191, l6: 0.791306

[epoch:  61/100, batch:   694/  792, ite: 61787] train loss: 4.544977, tar: 0.467762 
l0: 0.224216, l1: 0.229416, l2: 0.229895, l3: 0.233433, l4: 0.305941, l5: 0.416761, l6: 0.525487

[epoch:  61/100, batch:   696/  792, ite: 61788] train loss: 4.543910, tar: 0.467626 
l0: 0.489459, l1: 0.490984, l2: 0.488990, l3: 0.497432, l4: 0.484114, l5: 0.539980, l6: 0.710051

[epoch:  61/100, batch:   698/  792, ite: 61789] train loss: 4.543914, tar: 0.467638 
l0: 0.567250, l1: 0.568445, l2: 0.570001, l3: 0.575991, l4: 0.577994, l5: 0.619180, l6: 1.041687

[epoch:  61/100, batch:   700/  792, ite: 61790] train loss: 4.544493, tar: 0.46

[epoch:  61/100, batch:   780/  792, ite: 61830] train loss: 4.544809, tar: 0.468015 
l0: 0.493786, l1: 0.501315, l2: 0.500230, l3: 0.498994, l4: 0.528385, l5: 0.572861, l6: 0.748844

[epoch:  61/100, batch:   782/  792, ite: 61831] train loss: 4.544844, tar: 0.468029 
l0: 0.621997, l1: 0.618984, l2: 0.616500, l3: 0.614695, l4: 0.608568, l5: 0.626978, l6: 0.728687

[epoch:  61/100, batch:   784/  792, ite: 61832] train loss: 4.545257, tar: 0.468113 
l0: 0.413781, l1: 0.412050, l2: 0.414116, l3: 0.419065, l4: 0.429285, l5: 0.516562, l6: 0.656470

[epoch:  61/100, batch:   786/  792, ite: 61833] train loss: 4.544960, tar: 0.468083 
l0: 0.532938, l1: 0.542574, l2: 0.543893, l3: 0.549873, l4: 0.557790, l5: 0.569231, l6: 0.777015

[epoch:  61/100, batch:   788/  792, ite: 61834] train loss: 4.545191, tar: 0.468118 
l0: 0.309396, l1: 0.311587, l2: 0.313844, l3: 0.319079, l4: 0.355178, l5: 0.466876, l6: 0.608228

[epoch:  61/100, batch:   790/  792, ite: 61835] train loss: 4.544534, tar: 0.46

l0: 0.613120, l1: 0.616075, l2: 0.616175, l3: 0.611784, l4: 0.641513, l5: 0.743173, l6: 0.927087

[epoch:  62/100, batch:    78/  792, ite: 61875] train loss: 4.539304, tar: 0.467401 
l0: 0.616510, l1: 0.619490, l2: 0.622831, l3: 0.632316, l4: 0.642427, l5: 0.662876, l6: 0.824224

[epoch:  62/100, batch:    80/  792, ite: 61876] train loss: 4.539857, tar: 0.467480 
l0: 0.660867, l1: 0.663532, l2: 0.666054, l3: 0.682434, l4: 0.722212, l5: 0.902540, l6: 1.053786

[epoch:  62/100, batch:    82/  792, ite: 61877] train loss: 4.540815, tar: 0.467583 
l0: 0.927209, l1: 0.951148, l2: 0.948411, l3: 0.953350, l4: 0.987151, l5: 0.971134, l6: 1.043631

[epoch:  62/100, batch:    84/  792, ite: 61878] train loss: 4.542622, tar: 0.467828 
l0: 0.535145, l1: 0.532521, l2: 0.535746, l3: 0.542744, l4: 0.558851, l5: 0.661315, l6: 0.945977

[epoch:  62/100, batch:    86/  792, ite: 61879] train loss: 4.543069, tar: 0.467864 
l0: 0.411186, l1: 0.409089, l2: 0.408456, l3: 0.414089, l4: 0.428668, l5: 0.5469

[epoch:  62/100, batch:   166/  792, ite: 61919] train loss: 4.541983, tar: 0.467550 
l0: 1.597869, l1: 1.605219, l2: 1.617568, l3: 1.662124, l4: 1.681446, l5: 1.760759, l6: 1.992071

[epoch:  62/100, batch:   168/  792, ite: 61920] train loss: 4.546832, tar: 0.468138 
l0: 0.272713, l1: 0.274887, l2: 0.277037, l3: 0.287466, l4: 0.353958, l5: 0.482235, l6: 0.623782

[epoch:  62/100, batch:   170/  792, ite: 61921] train loss: 4.546136, tar: 0.468037 
l0: 0.409357, l1: 0.413137, l2: 0.414239, l3: 0.417957, l4: 0.453802, l5: 0.542843, l6: 0.590335

[epoch:  62/100, batch:   172/  792, ite: 61922] train loss: 4.545802, tar: 0.468006 
l0: 0.485383, l1: 0.483104, l2: 0.482542, l3: 0.483938, l4: 0.513797, l5: 0.611602, l6: 0.759623

[epoch:  62/100, batch:   174/  792, ite: 61923] train loss: 4.545843, tar: 0.468015 
l0: 0.439855, l1: 0.446144, l2: 0.443698, l3: 0.458680, l4: 0.506930, l5: 0.609897, l6: 0.680929

[epoch:  62/100, batch:   176/  792, ite: 61924] train loss: 4.545735, tar: 0.46

[epoch:  62/100, batch:   256/  792, ite: 61964] train loss: 4.541872, tar: 0.467687 
l0: 0.882834, l1: 0.885882, l2: 0.889418, l3: 0.892131, l4: 0.911155, l5: 0.932653, l6: 1.244669

[epoch:  62/100, batch:   258/  792, ite: 61965] train loss: 4.543668, tar: 0.467898 
l0: 0.248886, l1: 0.249837, l2: 0.246551, l3: 0.249270, l4: 0.273741, l5: 0.360732, l6: 0.482459

[epoch:  62/100, batch:   260/  792, ite: 61966] train loss: 4.542691, tar: 0.467787 
l0: 0.680613, l1: 0.693448, l2: 0.691774, l3: 0.690612, l4: 0.681819, l5: 0.718340, l6: 0.795084

[epoch:  62/100, batch:   262/  792, ite: 61967] train loss: 4.543275, tar: 0.467895 
l0: 0.647382, l1: 0.654722, l2: 0.647372, l3: 0.649866, l4: 0.649021, l5: 0.722805, l6: 0.904151

[epoch:  62/100, batch:   264/  792, ite: 61968] train loss: 4.544094, tar: 0.467986 
l0: 0.359435, l1: 0.358773, l2: 0.359281, l3: 0.357374, l4: 0.374118, l5: 0.392122, l6: 0.466848

[epoch:  62/100, batch:   266/  792, ite: 61969] train loss: 4.543409, tar: 0.46

[epoch:  62/100, batch:   346/  792, ite: 62009] train loss: 4.402168, tar: 0.480051 
l0: 0.794288, l1: 0.787641, l2: 0.785198, l3: 0.792507, l4: 0.782097, l5: 0.762764, l6: 0.944537

[epoch:  62/100, batch:   348/  792, ite: 62010] train loss: 4.629530, tar: 0.511474 
l0: 0.449971, l1: 0.446392, l2: 0.446397, l3: 0.446463, l4: 0.488509, l5: 0.656530, l6: 0.775715

[epoch:  62/100, batch:   350/  792, ite: 62011] train loss: 4.617388, tar: 0.505883 
l0: 0.768431, l1: 0.782714, l2: 0.777566, l3: 0.753996, l4: 0.780437, l5: 0.748068, l6: 0.901117

[epoch:  62/100, batch:   352/  792, ite: 62012] train loss: 4.768738, tar: 0.527762 
l0: 0.293729, l1: 0.297623, l2: 0.298675, l3: 0.308054, l4: 0.347432, l5: 0.512874, l6: 0.613645

[epoch:  62/100, batch:   354/  792, ite: 62013] train loss: 4.661674, tar: 0.509760 
l0: 0.871405, l1: 0.877522, l2: 0.880255, l3: 0.881745, l4: 0.895589, l5: 0.935090, l6: 1.009495

[epoch:  62/100, batch:   356/  792, ite: 62014] train loss: 4.861333, tar: 0.53

[epoch:  62/100, batch:   436/  792, ite: 62054] train loss: 4.503366, tar: 0.475671 
l0: 0.368791, l1: 0.369281, l2: 0.370302, l3: 0.375807, l4: 0.407972, l5: 0.502290, l6: 0.622817

[epoch:  62/100, batch:   438/  792, ite: 62055] train loss: 4.488638, tar: 0.473727 
l0: 0.310659, l1: 0.315205, l2: 0.317716, l3: 0.327600, l4: 0.338746, l5: 0.520241, l6: 0.748781

[epoch:  62/100, batch:   440/  792, ite: 62056] train loss: 4.475462, tar: 0.470816 
l0: 0.301195, l1: 0.304343, l2: 0.304515, l3: 0.308567, l4: 0.339331, l5: 0.439641, l6: 0.642171

[epoch:  62/100, batch:   442/  792, ite: 62057] train loss: 4.455084, tar: 0.467840 
l0: 0.227182, l1: 0.235103, l2: 0.234420, l3: 0.239331, l4: 0.267143, l5: 0.387446, l6: 0.516388

[epoch:  62/100, batch:   444/  792, ite: 62058] train loss: 4.424451, tar: 0.463690 
l0: 0.391339, l1: 0.394142, l2: 0.393475, l3: 0.406131, l4: 0.449126, l5: 0.500674, l6: 0.716665

[epoch:  62/100, batch:   446/  792, ite: 62059] train loss: 4.417966, tar: 0.46

[epoch:  62/100, batch:   526/  792, ite: 62099] train loss: 4.583269, tar: 0.480495 
l0: 0.316146, l1: 0.316164, l2: 0.314938, l3: 0.317219, l4: 0.346892, l5: 0.439349, l6: 0.528734

[epoch:  62/100, batch:   528/  792, ite: 62100] train loss: 4.569677, tar: 0.478852 
l0: 0.617771, l1: 0.616587, l2: 0.614913, l3: 0.615936, l4: 0.629163, l5: 0.681917, l6: 0.928238

[epoch:  62/100, batch:   530/  792, ite: 62101] train loss: 4.580446, tar: 0.480227 
l0: 0.468872, l1: 0.481656, l2: 0.479370, l3: 0.477275, l4: 0.481825, l5: 0.567208, l6: 0.836057

[epoch:  62/100, batch:   532/  792, ite: 62102] train loss: 4.581058, tar: 0.480116 
l0: 0.195929, l1: 0.203451, l2: 0.204187, l3: 0.211129, l4: 0.265294, l5: 0.366490, l6: 0.392167

[epoch:  62/100, batch:   534/  792, ite: 62103] train loss: 4.558655, tar: 0.477357 
l0: 0.518741, l1: 0.515627, l2: 0.518596, l3: 0.521913, l4: 0.552293, l5: 0.611773, l6: 0.702564

[epoch:  62/100, batch:   536/  792, ite: 62104] train loss: 4.559778, tar: 0.47

[epoch:  62/100, batch:   616/  792, ite: 62144] train loss: 4.492832, tar: 0.467408 
l0: 1.004931, l1: 1.025421, l2: 1.032748, l3: 1.035839, l4: 1.028981, l5: 1.157932, l6: 1.186243

[epoch:  62/100, batch:   618/  792, ite: 62145] train loss: 4.522901, tar: 0.471115 
l0: 0.334219, l1: 0.336750, l2: 0.334558, l3: 0.345426, l4: 0.372478, l5: 0.478587, l6: 0.642898

[epoch:  62/100, batch:   620/  792, ite: 62146] train loss: 4.516541, tar: 0.470178 
l0: 0.275452, l1: 0.276939, l2: 0.277000, l3: 0.289449, l4: 0.310856, l5: 0.440436, l6: 0.509944

[epoch:  62/100, batch:   622/  792, ite: 62147] train loss: 4.505858, tar: 0.468853 
l0: 0.345554, l1: 0.345090, l2: 0.344217, l3: 0.346748, l4: 0.365572, l5: 0.480939, l6: 0.614231

[epoch:  62/100, batch:   624/  792, ite: 62148] train loss: 4.499416, tar: 0.468020 
l0: 0.449788, l1: 0.458216, l2: 0.457965, l3: 0.455644, l4: 0.472790, l5: 0.491283, l6: 0.593635

[epoch:  62/100, batch:   626/  792, ite: 62149] train loss: 4.496663, tar: 0.46

[epoch:  62/100, batch:   706/  792, ite: 62189] train loss: 4.483121, tar: 0.461765 
l0: 0.722711, l1: 0.724505, l2: 0.731563, l3: 0.730205, l4: 0.732586, l5: 0.899677, l6: 1.240451

[epoch:  62/100, batch:   708/  792, ite: 62190] train loss: 4.497266, tar: 0.463139 
l0: 0.796215, l1: 0.794668, l2: 0.796844, l3: 0.793077, l4: 0.821563, l5: 0.822966, l6: 0.918188

[epoch:  62/100, batch:   710/  792, ite: 62191] train loss: 4.508530, tar: 0.464883 
l0: 0.252193, l1: 0.251274, l2: 0.250133, l3: 0.260255, l4: 0.289535, l5: 0.384318, l6: 0.561652

[epoch:  62/100, batch:   712/  792, ite: 62192] train loss: 4.499843, tar: 0.463775 
l0: 0.315297, l1: 0.315786, l2: 0.314426, l3: 0.313690, l4: 0.355732, l5: 0.499381, l6: 0.690443

[epoch:  62/100, batch:   714/  792, ite: 62193] train loss: 4.494794, tar: 0.463006 
l0: 0.256765, l1: 0.260255, l2: 0.260415, l3: 0.255347, l4: 0.282428, l5: 0.395910, l6: 0.502842

[epoch:  62/100, batch:   716/  792, ite: 62194] train loss: 4.485922, tar: 0.46

l0: 0.309572, l1: 0.314648, l2: 0.314436, l3: 0.317965, l4: 0.364568, l5: 0.456795, l6: 0.737704

[epoch:  63/100, batch:     4/  792, ite: 62234] train loss: 4.490653, tar: 0.463127 
l0: 0.664967, l1: 0.672116, l2: 0.675109, l3: 0.676699, l4: 0.672357, l5: 0.731760, l6: 0.912990

[epoch:  63/100, batch:     6/  792, ite: 62235] train loss: 4.496691, tar: 0.463986 
l0: 0.526230, l1: 0.531288, l2: 0.532057, l3: 0.543859, l4: 0.576121, l5: 0.676488, l6: 1.058542

[epoch:  63/100, batch:     8/  792, ite: 62236] train loss: 4.501362, tar: 0.464250 
l0: 0.215859, l1: 0.217438, l2: 0.219056, l3: 0.215186, l4: 0.237666, l5: 0.322498, l6: 0.488425

[epoch:  63/100, batch:    10/  792, ite: 62237] train loss: 4.492616, tar: 0.463202 
l0: 0.283393, l1: 0.287964, l2: 0.290047, l3: 0.304919, l4: 0.328229, l5: 0.366423, l6: 0.451247

[epoch:  63/100, batch:    12/  792, ite: 62238] train loss: 4.485640, tar: 0.462446 
l0: 0.442955, l1: 0.446511, l2: 0.445871, l3: 0.457220, l4: 0.487418, l5: 0.5589

[epoch:  63/100, batch:    92/  792, ite: 62278] train loss: 4.484383, tar: 0.461046 
l0: 0.772500, l1: 0.777004, l2: 0.776701, l3: 0.771678, l4: 0.804379, l5: 0.868663, l6: 0.991281

[epoch:  63/100, batch:    94/  792, ite: 62279] train loss: 4.492598, tar: 0.462163 
l0: 0.366902, l1: 0.368653, l2: 0.371243, l3: 0.376347, l4: 0.389137, l5: 0.432392, l6: 0.607452

[epoch:  63/100, batch:    96/  792, ite: 62280] train loss: 4.489597, tar: 0.461822 
l0: 0.244410, l1: 0.251866, l2: 0.254037, l3: 0.264195, l4: 0.300066, l5: 0.415795, l6: 0.587809

[epoch:  63/100, batch:    98/  792, ite: 62281] train loss: 4.484296, tar: 0.461049 
l0: 0.409726, l1: 0.412345, l2: 0.413469, l3: 0.424087, l4: 0.461420, l5: 0.553510, l6: 0.735708

[epoch:  63/100, batch:   100/  792, ite: 62282] train loss: 4.483125, tar: 0.460867 
l0: 0.596852, l1: 0.613328, l2: 0.611269, l3: 0.622018, l4: 0.652378, l5: 0.726264, l6: 0.851178

[epoch:  63/100, batch:   102/  792, ite: 62283] train loss: 4.487466, tar: 0.46

[epoch:  63/100, batch:   182/  792, ite: 62323] train loss: 4.468064, tar: 0.458480 
l0: 0.486648, l1: 0.485373, l2: 0.494039, l3: 0.497696, l4: 0.528708, l5: 0.591310, l6: 0.730096

[epoch:  63/100, batch:   184/  792, ite: 62324] train loss: 4.468631, tar: 0.458567 
l0: 0.351734, l1: 0.358167, l2: 0.359639, l3: 0.360849, l4: 0.404277, l5: 0.548959, l6: 0.746501

[epoch:  63/100, batch:   186/  792, ite: 62325] train loss: 4.466841, tar: 0.458238 
l0: 0.744718, l1: 0.748883, l2: 0.750346, l3: 0.760233, l4: 0.793848, l5: 0.820739, l6: 1.270468

[epoch:  63/100, batch:   188/  792, ite: 62326] train loss: 4.475504, tar: 0.459117 
l0: 0.350803, l1: 0.347351, l2: 0.351607, l3: 0.363224, l4: 0.357437, l5: 0.413242, l6: 0.450341

[epoch:  63/100, batch:   190/  792, ite: 62327] train loss: 4.471421, tar: 0.458785 
l0: 0.454271, l1: 0.457191, l2: 0.457427, l3: 0.463973, l4: 0.505887, l5: 0.560502, l6: 0.663748

[epoch:  63/100, batch:   192/  792, ite: 62328] train loss: 4.470751, tar: 0.45

[epoch:  63/100, batch:   272/  792, ite: 62368] train loss: 4.439782, tar: 0.455477 
l0: 0.697335, l1: 0.696688, l2: 0.696758, l3: 0.696954, l4: 0.726736, l5: 0.879288, l6: 0.989343

[epoch:  63/100, batch:   274/  792, ite: 62369] train loss: 4.444772, tar: 0.456132 
l0: 0.453173, l1: 0.458120, l2: 0.454381, l3: 0.458606, l4: 0.488310, l5: 0.559666, l6: 0.702083

[epoch:  63/100, batch:   276/  792, ite: 62370] train loss: 4.444442, tar: 0.456124 
l0: 0.529002, l1: 0.533691, l2: 0.535339, l3: 0.529616, l4: 0.542704, l5: 0.675942, l6: 0.792666

[epoch:  63/100, batch:   278/  792, ite: 62371] train loss: 4.445841, tar: 0.456321 
l0: 0.429065, l1: 0.426727, l2: 0.423652, l3: 0.429404, l4: 0.444189, l5: 0.599729, l6: 0.731831

[epoch:  63/100, batch:   280/  792, ite: 62372] train loss: 4.445712, tar: 0.456248 
l0: 0.332674, l1: 0.334378, l2: 0.335614, l3: 0.353698, l4: 0.387561, l5: 0.508963, l6: 0.700741

[epoch:  63/100, batch:   282/  792, ite: 62373] train loss: 4.443745, tar: 0.45

[epoch:  63/100, batch:   362/  792, ite: 62413] train loss: 4.418679, tar: 0.452836 
l0: 0.512376, l1: 0.516110, l2: 0.517290, l3: 0.517603, l4: 0.527961, l5: 0.481956, l6: 0.645588

[epoch:  63/100, batch:   364/  792, ite: 62414] train loss: 4.418733, tar: 0.452980 
l0: 0.718593, l1: 0.721591, l2: 0.719119, l3: 0.717058, l4: 0.716094, l5: 0.728672, l6: 0.771665

[epoch:  63/100, batch:   366/  792, ite: 62415] train loss: 4.422416, tar: 0.453620 
l0: 0.375320, l1: 0.379807, l2: 0.378901, l3: 0.385263, l4: 0.404727, l5: 0.480951, l6: 0.605256

[epoch:  63/100, batch:   368/  792, ite: 62416] train loss: 4.420567, tar: 0.453431 
l0: 0.571742, l1: 0.577040, l2: 0.578661, l3: 0.575662, l4: 0.576427, l5: 0.664470, l6: 0.795851

[epoch:  63/100, batch:   370/  792, ite: 62417] train loss: 4.422495, tar: 0.453715 
l0: 0.563968, l1: 0.556392, l2: 0.559118, l3: 0.565794, l4: 0.632146, l5: 0.779638, l6: 0.874468

[epoch:  63/100, batch:   372/  792, ite: 62418] train loss: 4.424883, tar: 0.45

[epoch:  63/100, batch:   452/  792, ite: 62458] train loss: 4.419208, tar: 0.453265 
l0: 0.213118, l1: 0.214222, l2: 0.215987, l3: 0.220104, l4: 0.272557, l5: 0.412257, l6: 0.570445

[epoch:  63/100, batch:   454/  792, ite: 62459] train loss: 4.415341, tar: 0.452741 
l0: 0.470156, l1: 0.467964, l2: 0.461411, l3: 0.459435, l4: 0.459776, l5: 0.550371, l6: 0.613501

[epoch:  63/100, batch:   456/  792, ite: 62460] train loss: 4.414843, tar: 0.452779 
l0: 0.272344, l1: 0.276842, l2: 0.276192, l3: 0.280286, l4: 0.306401, l5: 0.541863, l6: 0.666452

[epoch:  63/100, batch:   458/  792, ite: 62461] train loss: 4.412648, tar: 0.452388 
l0: 0.482959, l1: 0.494365, l2: 0.496449, l3: 0.509488, l4: 0.540121, l5: 0.723964, l6: 0.870399

[epoch:  63/100, batch:   460/  792, ite: 62462] train loss: 4.413715, tar: 0.452454 
l0: 0.413014, l1: 0.413251, l2: 0.415838, l3: 0.421637, l4: 0.439127, l5: 0.536525, l6: 0.697245

[epoch:  63/100, batch:   462/  792, ite: 62463] train loss: 4.412927, tar: 0.45

[epoch:  63/100, batch:   542/  792, ite: 62503] train loss: 4.436191, tar: 0.454864 
l0: 0.215304, l1: 0.217727, l2: 0.226791, l3: 0.228267, l4: 0.266996, l5: 0.388615, l6: 0.497825

[epoch:  63/100, batch:   544/  792, ite: 62504] train loss: 4.432632, tar: 0.454389 
l0: 0.225844, l1: 0.233260, l2: 0.235082, l3: 0.248456, l4: 0.262272, l5: 0.371915, l6: 0.507953

[epoch:  63/100, batch:   546/  792, ite: 62505] train loss: 4.429091, tar: 0.453936 
l0: 0.303290, l1: 0.306603, l2: 0.306718, l3: 0.310140, l4: 0.345498, l5: 0.494557, l6: 0.651562

[epoch:  63/100, batch:   548/  792, ite: 62506] train loss: 4.426932, tar: 0.453639 
l0: 0.461943, l1: 0.476488, l2: 0.477343, l3: 0.479005, l4: 0.521436, l5: 0.558803, l6: 0.738629

[epoch:  63/100, batch:   550/  792, ite: 62507] train loss: 4.427010, tar: 0.453655 
l0: 0.403324, l1: 0.404204, l2: 0.402464, l3: 0.408579, l4: 0.419576, l5: 0.492879, l6: 0.599734

[epoch:  63/100, batch:   552/  792, ite: 62508] train loss: 4.425764, tar: 0.45

[epoch:  63/100, batch:   632/  792, ite: 62548] train loss: 4.459984, tar: 0.457832 
l0: 0.375781, l1: 0.373962, l2: 0.376976, l3: 0.382500, l4: 0.395603, l5: 0.432102, l6: 0.526689

[epoch:  63/100, batch:   634/  792, ite: 62549] train loss: 4.458077, tar: 0.457683 
l0: 0.306175, l1: 0.303821, l2: 0.305267, l3: 0.312494, l4: 0.317398, l5: 0.389775, l6: 0.489424

[epoch:  63/100, batch:   636/  792, ite: 62550] train loss: 4.455401, tar: 0.457407 
l0: 0.623005, l1: 0.627209, l2: 0.628613, l3: 0.625088, l4: 0.651581, l5: 0.774841, l6: 1.120994

[epoch:  63/100, batch:   638/  792, ite: 62551] train loss: 4.458763, tar: 0.457708 
l0: 0.428405, l1: 0.437304, l2: 0.439325, l3: 0.447484, l4: 0.449867, l5: 0.514244, l6: 0.681857

[epoch:  63/100, batch:   640/  792, ite: 62552] train loss: 4.458393, tar: 0.457655 
l0: 0.405217, l1: 0.407878, l2: 0.401097, l3: 0.393263, l4: 0.409431, l5: 0.466156, l6: 0.653800

[epoch:  63/100, batch:   642/  792, ite: 62553] train loss: 4.457222, tar: 0.45

[epoch:  63/100, batch:   722/  792, ite: 62593] train loss: 4.480366, tar: 0.460383 
l0: 0.205998, l1: 0.207541, l2: 0.210072, l3: 0.219652, l4: 0.247925, l5: 0.401733, l6: 0.533913

[epoch:  63/100, batch:   724/  792, ite: 62594] train loss: 4.477185, tar: 0.459955 
l0: 0.357489, l1: 0.358658, l2: 0.358266, l3: 0.364300, l4: 0.389451, l5: 0.497228, l6: 0.672650

[epoch:  63/100, batch:   726/  792, ite: 62595] train loss: 4.475974, tar: 0.459783 
l0: 0.385424, l1: 0.385844, l2: 0.387712, l3: 0.391298, l4: 0.421625, l5: 0.512557, l6: 0.707550

[epoch:  63/100, batch:   728/  792, ite: 62596] train loss: 4.475196, tar: 0.459658 
l0: 0.629465, l1: 0.623566, l2: 0.626073, l3: 0.626410, l4: 0.619134, l5: 0.678208, l6: 0.795150

[epoch:  63/100, batch:   730/  792, ite: 62597] train loss: 4.476867, tar: 0.459943 
l0: 0.268262, l1: 0.278614, l2: 0.275033, l3: 0.283951, l4: 0.301330, l5: 0.420177, l6: 0.588874

[epoch:  63/100, batch:   732/  792, ite: 62598] train loss: 4.474374, tar: 0.45

l0: 0.330561, l1: 0.335489, l2: 0.334676, l3: 0.332361, l4: 0.337042, l5: 0.449880, l6: 0.669450

[epoch:  64/100, batch:    20/  792, ite: 62638] train loss: 4.481007, tar: 0.460253 
l0: 0.490727, l1: 0.499004, l2: 0.501348, l3: 0.502387, l4: 0.516314, l5: 0.574223, l6: 0.605766

[epoch:  64/100, batch:    22/  792, ite: 62639] train loss: 4.480843, tar: 0.460301 
l0: 0.578883, l1: 0.574187, l2: 0.574913, l3: 0.582024, l4: 0.588471, l5: 0.703998, l6: 0.969580

[epoch:  64/100, batch:    24/  792, ite: 62640] train loss: 4.482456, tar: 0.460486 
l0: 0.320983, l1: 0.321938, l2: 0.321900, l3: 0.331197, l4: 0.341274, l5: 0.426605, l6: 0.552463

[epoch:  64/100, batch:    26/  792, ite: 62641] train loss: 4.480605, tar: 0.460268 
l0: 0.632004, l1: 0.639639, l2: 0.637291, l3: 0.644003, l4: 0.681293, l5: 0.694953, l6: 0.793652

[epoch:  64/100, batch:    28/  792, ite: 62642] train loss: 4.482301, tar: 0.460536 
l0: 0.405005, l1: 0.404408, l2: 0.405241, l3: 0.412698, l4: 0.434156, l5: 0.5125

[epoch:  64/100, batch:   108/  792, ite: 62682] train loss: 4.477851, tar: 0.460077 
l0: 0.236153, l1: 0.233850, l2: 0.232589, l3: 0.240826, l4: 0.276465, l5: 0.427743, l6: 0.518994

[epoch:  64/100, batch:   110/  792, ite: 62683] train loss: 4.475168, tar: 0.459749 
l0: 0.357162, l1: 0.359946, l2: 0.361970, l3: 0.361177, l4: 0.391389, l5: 0.507756, l6: 0.612986

[epoch:  64/100, batch:   112/  792, ite: 62684] train loss: 4.474010, tar: 0.459599 
l0: 0.685953, l1: 0.686401, l2: 0.687032, l3: 0.682322, l4: 0.692648, l5: 0.794692, l6: 1.043748

[epoch:  64/100, batch:   114/  792, ite: 62685] train loss: 4.476870, tar: 0.459930 
l0: 0.680797, l1: 0.686650, l2: 0.683194, l3: 0.670087, l4: 0.688290, l5: 0.789994, l6: 1.049044

[epoch:  64/100, batch:   116/  792, ite: 62686] train loss: 4.479412, tar: 0.460252 
l0: 0.474695, l1: 0.473326, l2: 0.473580, l3: 0.484315, l4: 0.519489, l5: 0.654125, l6: 0.788908

[epoch:  64/100, batch:   118/  792, ite: 62687] train loss: 4.479905, tar: 0.46

[epoch:  64/100, batch:   198/  792, ite: 62727] train loss: 4.486339, tar: 0.461719 
l0: 0.314846, l1: 0.313432, l2: 0.316626, l3: 0.320609, l4: 0.369494, l5: 0.510764, l6: 0.633600

[epoch:  64/100, batch:   200/  792, ite: 62728] train loss: 4.484928, tar: 0.461518 
l0: 0.542960, l1: 0.554556, l2: 0.549820, l3: 0.559571, l4: 0.573493, l5: 0.716244, l6: 0.864588

[epoch:  64/100, batch:   202/  792, ite: 62729] train loss: 4.485957, tar: 0.461629 
l0: 0.414792, l1: 0.429680, l2: 0.429981, l3: 0.436118, l4: 0.486385, l5: 0.618863, l6: 0.798638

[epoch:  64/100, batch:   204/  792, ite: 62730] train loss: 4.485988, tar: 0.461565 
l0: 0.466722, l1: 0.467285, l2: 0.468538, l3: 0.475472, l4: 0.472315, l5: 0.527952, l6: 0.706407

[epoch:  64/100, batch:   206/  792, ite: 62731] train loss: 4.485800, tar: 0.461572 
l0: 0.406159, l1: 0.406290, l2: 0.401031, l3: 0.405589, l4: 0.446276, l5: 0.656225, l6: 0.844277

[epoch:  64/100, batch:   208/  792, ite: 62732] train loss: 4.485837, tar: 0.46

[epoch:  64/100, batch:   288/  792, ite: 62772] train loss: 4.476165, tar: 0.460586 
l0: 0.709827, l1: 0.712479, l2: 0.716481, l3: 0.724266, l4: 0.754577, l5: 0.814527, l6: 0.957139

[epoch:  64/100, batch:   290/  792, ite: 62773] train loss: 4.478622, tar: 0.460909 
l0: 0.352302, l1: 0.354787, l2: 0.355868, l3: 0.359322, l4: 0.377917, l5: 0.508368, l6: 0.733932

[epoch:  64/100, batch:   292/  792, ite: 62774] train loss: 4.477764, tar: 0.460768 
l0: 0.285267, l1: 0.287353, l2: 0.290457, l3: 0.289097, l4: 0.326627, l5: 0.449255, l6: 0.648527

[epoch:  64/100, batch:   294/  792, ite: 62775] train loss: 4.476254, tar: 0.460542 
l0: 0.378869, l1: 0.384329, l2: 0.383612, l3: 0.388372, l4: 0.414687, l5: 0.480862, l6: 0.576989

[epoch:  64/100, batch:   296/  792, ite: 62776] train loss: 4.475223, tar: 0.460437 
l0: 0.411227, l1: 0.414342, l2: 0.414374, l3: 0.419541, l4: 0.437701, l5: 0.539462, l6: 0.808174

[epoch:  64/100, batch:   298/  792, ite: 62777] train loss: 4.475156, tar: 0.46

[epoch:  64/100, batch:   378/  792, ite: 62817] train loss: 4.464123, tar: 0.459146 
l0: 0.371519, l1: 0.374204, l2: 0.376100, l3: 0.382835, l4: 0.417040, l5: 0.480586, l6: 0.609967

[epoch:  64/100, batch:   380/  792, ite: 62818] train loss: 4.463167, tar: 0.459039 
l0: 0.698898, l1: 0.707463, l2: 0.709336, l3: 0.711200, l4: 0.736151, l5: 0.799762, l6: 0.899258

[epoch:  64/100, batch:   382/  792, ite: 62819] train loss: 4.465363, tar: 0.459332 
l0: 0.775826, l1: 0.784449, l2: 0.784542, l3: 0.779254, l4: 0.757036, l5: 0.783084, l6: 0.820112

[epoch:  64/100, batch:   384/  792, ite: 62820] train loss: 4.467656, tar: 0.459718 
l0: 0.371102, l1: 0.370001, l2: 0.370496, l3: 0.371153, l4: 0.377374, l5: 0.481863, l6: 0.579989

[epoch:  64/100, batch:   386/  792, ite: 62821] train loss: 4.466619, tar: 0.459610 
l0: 0.218154, l1: 0.219698, l2: 0.220730, l3: 0.227576, l4: 0.255444, l5: 0.372398, l6: 0.489238

[epoch:  64/100, batch:   388/  792, ite: 62822] train loss: 4.464264, tar: 0.45

[epoch:  64/100, batch:   468/  792, ite: 62862] train loss: 4.493097, tar: 0.462269 
l0: 0.335931, l1: 0.341699, l2: 0.342595, l3: 0.357902, l4: 0.407803, l5: 0.493992, l6: 0.652686

[epoch:  64/100, batch:   470/  792, ite: 62863] train loss: 4.492037, tar: 0.462123 
l0: 0.401947, l1: 0.403996, l2: 0.403882, l3: 0.409370, l4: 0.436289, l5: 0.626528, l6: 0.808928

[epoch:  64/100, batch:   472/  792, ite: 62864] train loss: 4.491853, tar: 0.462053 
l0: 1.224829, l1: 1.238455, l2: 1.231636, l3: 1.247518, l4: 1.281848, l5: 1.293790, l6: 1.251531

[epoch:  64/100, batch:   474/  792, ite: 62865] train loss: 4.498262, tar: 0.462935 
l0: 0.770893, l1: 0.774302, l2: 0.784428, l3: 0.799393, l4: 0.821920, l5: 0.783583, l6: 0.897530

[epoch:  64/100, batch:   476/  792, ite: 62866] train loss: 4.500700, tar: 0.463291 
l0: 0.247623, l1: 0.245229, l2: 0.245963, l3: 0.249275, l4: 0.291008, l5: 0.430796, l6: 0.545122

[epoch:  64/100, batch:   478/  792, ite: 62867] train loss: 4.498853, tar: 0.46

[epoch:  64/100, batch:   558/  792, ite: 62907] train loss: 4.490307, tar: 0.462065 
l0: 0.451173, l1: 0.453032, l2: 0.453485, l3: 0.459760, l4: 0.471120, l5: 0.561328, l6: 0.624622

[epoch:  64/100, batch:   560/  792, ite: 62908] train loss: 4.489920, tar: 0.462053 
l0: 0.631354, l1: 0.636953, l2: 0.638183, l3: 0.647605, l4: 0.648227, l5: 0.707873, l6: 0.852581

[epoch:  64/100, batch:   562/  792, ite: 62909] train loss: 4.491295, tar: 0.462239 
l0: 0.447672, l1: 0.455515, l2: 0.451269, l3: 0.453982, l4: 0.490424, l5: 0.594704, l6: 0.756702

[epoch:  64/100, batch:   564/  792, ite: 62910] train loss: 4.491338, tar: 0.462223 
l0: 0.480389, l1: 0.480597, l2: 0.482815, l3: 0.482459, l4: 0.510270, l5: 0.674604, l6: 0.839332

[epoch:  64/100, batch:   566/  792, ite: 62911] train loss: 4.491822, tar: 0.462243 
l0: 0.378507, l1: 0.380869, l2: 0.381776, l3: 0.396209, l4: 0.432941, l5: 0.581566, l6: 0.746766

[epoch:  64/100, batch:   568/  792, ite: 62912] train loss: 4.491285, tar: 0.46

[epoch:  64/100, batch:   648/  792, ite: 62952] train loss: 4.478045, tar: 0.460481 
l0: 0.340315, l1: 0.344625, l2: 0.345830, l3: 0.344228, l4: 0.360914, l5: 0.427146, l6: 0.568866

[epoch:  64/100, batch:   650/  792, ite: 62953] train loss: 4.476807, tar: 0.460355 
l0: 1.040572, l1: 1.038505, l2: 1.049799, l3: 1.048957, l4: 1.038275, l5: 1.052639, l6: 1.157391

[epoch:  64/100, batch:   652/  792, ite: 62954] train loss: 4.481294, tar: 0.460963 
l0: 0.412015, l1: 0.420829, l2: 0.423307, l3: 0.431664, l4: 0.478425, l5: 0.578744, l6: 0.702448

[epoch:  64/100, batch:   654/  792, ite: 62955] train loss: 4.481056, tar: 0.460912 
l0: 0.443383, l1: 0.454591, l2: 0.455581, l3: 0.462455, l4: 0.468341, l5: 0.568008, l6: 0.585320

[epoch:  64/100, batch:   656/  792, ite: 62956] train loss: 4.480698, tar: 0.460893 
l0: 0.314634, l1: 0.319272, l2: 0.322299, l3: 0.315242, l4: 0.332097, l5: 0.362560, l6: 0.467622

[epoch:  64/100, batch:   658/  792, ite: 62957] train loss: 4.479090, tar: 0.46

[epoch:  64/100, batch:   738/  792, ite: 62997] train loss: 4.488524, tar: 0.461372 
l0: 0.379061, l1: 0.380372, l2: 0.383416, l3: 0.380911, l4: 0.384110, l5: 0.530895, l6: 0.527957

[epoch:  64/100, batch:   740/  792, ite: 62998] train loss: 4.487722, tar: 0.461290 
l0: 0.427848, l1: 0.427823, l2: 0.430359, l3: 0.435813, l4: 0.429702, l5: 0.478170, l6: 0.528600

[epoch:  64/100, batch:   742/  792, ite: 62999] train loss: 4.487019, tar: 0.461256 
l0: 0.361235, l1: 0.361277, l2: 0.359519, l3: 0.361505, l4: 0.389715, l5: 0.508863, l6: 0.606586

[epoch:  64/100, batch:   744/  792, ite: 63000] train loss: 4.486123, tar: 0.461156 
l0: 0.474688, l1: 0.481764, l2: 0.480020, l3: 0.483921, l4: 0.526987, l5: 0.661511, l6: 0.942702

[epoch:  64/100, batch:   746/  792, ite: 63001] train loss: 4.486656, tar: 0.461170 
l0: 0.554702, l1: 0.556132, l2: 0.554207, l3: 0.554069, l4: 0.570840, l5: 0.603752, l6: 0.708362

[epoch:  64/100, batch:   748/  792, ite: 63002] train loss: 4.487018, tar: 0.46

l0: 0.691245, l1: 0.692214, l2: 0.690649, l3: 0.691272, l4: 0.738423, l5: 0.795998, l6: 0.919875

[epoch:  65/100, batch:    36/  792, ite: 63042] train loss: 4.480698, tar: 0.460402 
l0: 0.533456, l1: 0.538386, l2: 0.537858, l3: 0.534896, l4: 0.554773, l5: 0.674379, l6: 1.035447

[epoch:  65/100, batch:    38/  792, ite: 63043] train loss: 4.481791, tar: 0.460472 
l0: 0.459659, l1: 0.462172, l2: 0.460833, l3: 0.467923, l4: 0.492910, l5: 0.557171, l6: 0.708321

[epoch:  65/100, batch:    40/  792, ite: 63044] train loss: 4.481824, tar: 0.460471 
l0: 0.415400, l1: 0.415033, l2: 0.418370, l3: 0.432869, l4: 0.443014, l5: 0.508824, l6: 0.673437

[epoch:  65/100, batch:    42/  792, ite: 63045] train loss: 4.481316, tar: 0.460428 
l0: 0.372664, l1: 0.374741, l2: 0.373484, l3: 0.385556, l4: 0.424856, l5: 0.505915, l6: 0.650258

[epoch:  65/100, batch:    44/  792, ite: 63046] train loss: 4.480695, tar: 0.460344 
l0: 0.375857, l1: 0.379509, l2: 0.376322, l3: 0.383606, l4: 0.407859, l5: 0.4978

[epoch:  65/100, batch:   124/  792, ite: 63086] train loss: 4.478862, tar: 0.459994 
l0: 0.697124, l1: 0.697293, l2: 0.698878, l3: 0.704842, l4: 0.748040, l5: 0.759832, l6: 1.490410

[epoch:  65/100, batch:   126/  792, ite: 63087] train loss: 4.481585, tar: 0.460212 
l0: 0.441671, l1: 0.452259, l2: 0.453994, l3: 0.453680, l4: 0.469326, l5: 0.463690, l6: 0.591253

[epoch:  65/100, batch:   128/  792, ite: 63088] train loss: 4.481080, tar: 0.460195 
l0: 0.393766, l1: 0.396578, l2: 0.397071, l3: 0.407435, l4: 0.436571, l5: 0.487411, l6: 0.671567

[epoch:  65/100, batch:   130/  792, ite: 63089] train loss: 4.480523, tar: 0.460134 
l0: 0.656815, l1: 0.657923, l2: 0.653746, l3: 0.654910, l4: 0.678901, l5: 0.754874, l6: 1.061326

[epoch:  65/100, batch:   132/  792, ite: 63090] train loss: 4.482132, tar: 0.460315 
l0: 0.523968, l1: 0.524026, l2: 0.524640, l3: 0.521567, l4: 0.555431, l5: 0.576964, l6: 0.629720

[epoch:  65/100, batch:   134/  792, ite: 63091] train loss: 4.482219, tar: 0.46

[epoch:  65/100, batch:   214/  792, ite: 63131] train loss: 4.478023, tar: 0.459890 
l0: 0.512266, l1: 0.510500, l2: 0.513377, l3: 0.517637, l4: 0.548245, l5: 0.578015, l6: 0.763853

[epoch:  65/100, batch:   216/  792, ite: 63132] train loss: 4.478279, tar: 0.459936 
l0: 0.429848, l1: 0.437577, l2: 0.435037, l3: 0.449740, l4: 0.468672, l5: 0.529408, l6: 0.800859

[epoch:  65/100, batch:   218/  792, ite: 63133] train loss: 4.478370, tar: 0.459910 
l0: 0.260620, l1: 0.257479, l2: 0.257380, l3: 0.267497, l4: 0.298900, l5: 0.426437, l6: 0.524504

[epoch:  65/100, batch:   220/  792, ite: 63134] train loss: 4.476912, tar: 0.459734 
l0: 0.275116, l1: 0.277576, l2: 0.281030, l3: 0.278828, l4: 0.299699, l5: 0.377342, l6: 0.582897

[epoch:  65/100, batch:   222/  792, ite: 63135] train loss: 4.475576, tar: 0.459571 
l0: 0.495389, l1: 0.498595, l2: 0.498813, l3: 0.499202, l4: 0.515330, l5: 0.639557, l6: 0.766933

[epoch:  65/100, batch:   224/  792, ite: 63136] train loss: 4.475795, tar: 0.45

[epoch:  65/100, batch:   304/  792, ite: 63176] train loss: 4.461133, tar: 0.457436 
l0: 0.866903, l1: 0.872254, l2: 0.874884, l3: 0.878732, l4: 0.904141, l5: 0.918464, l6: 1.076584

[epoch:  65/100, batch:   306/  792, ite: 63177] train loss: 4.463839, tar: 0.457784 
l0: 0.506740, l1: 0.514698, l2: 0.513373, l3: 0.509154, l4: 0.535274, l5: 0.690442, l6: 0.744276

[epoch:  65/100, batch:   308/  792, ite: 63178] train loss: 4.464160, tar: 0.457825 
l0: 0.445062, l1: 0.446494, l2: 0.446384, l3: 0.448070, l4: 0.464495, l5: 0.541981, l6: 0.705161

[epoch:  65/100, batch:   310/  792, ite: 63179] train loss: 4.464015, tar: 0.457814 
l0: 0.440205, l1: 0.443435, l2: 0.443188, l3: 0.448311, l4: 0.473647, l5: 0.555953, l6: 0.686383

[epoch:  65/100, batch:   312/  792, ite: 63180] train loss: 4.463754, tar: 0.457799 
l0: 0.347668, l1: 0.357495, l2: 0.356648, l3: 0.373652, l4: 0.413826, l5: 0.512025, l6: 0.601822

[epoch:  65/100, batch:   314/  792, ite: 63181] train loss: 4.463048, tar: 0.45

[epoch:  65/100, batch:   394/  792, ite: 63221] train loss: 4.452367, tar: 0.456599 
l0: 0.868777, l1: 0.877465, l2: 0.869882, l3: 0.884167, l4: 0.920105, l5: 0.941119, l6: 0.986897

[epoch:  65/100, batch:   396/  792, ite: 63222] train loss: 4.454816, tar: 0.456937 
l0: 0.533685, l1: 0.534226, l2: 0.531682, l3: 0.544568, l4: 0.577832, l5: 0.730136, l6: 0.931690

[epoch:  65/100, batch:   398/  792, ite: 63223] train loss: 4.455455, tar: 0.456999 
l0: 0.405630, l1: 0.410920, l2: 0.411177, l3: 0.417456, l4: 0.418294, l5: 0.496080, l6: 0.642947

[epoch:  65/100, batch:   400/  792, ite: 63224] train loss: 4.454978, tar: 0.456957 
l0: 0.201231, l1: 0.206887, l2: 0.208680, l3: 0.206663, l4: 0.241315, l5: 0.301034, l6: 0.385838

[epoch:  65/100, batch:   402/  792, ite: 63225] train loss: 4.453127, tar: 0.456749 
l0: 0.365447, l1: 0.361668, l2: 0.361458, l3: 0.364348, l4: 0.378258, l5: 0.473486, l6: 0.626726

[epoch:  65/100, batch:   404/  792, ite: 63226] train loss: 4.452435, tar: 0.45

[epoch:  65/100, batch:   484/  792, ite: 63266] train loss: 4.455980, tar: 0.456813 
l0: 0.244040, l1: 0.250150, l2: 0.248468, l3: 0.246828, l4: 0.269293, l5: 0.361089, l6: 0.498382

[epoch:  65/100, batch:   486/  792, ite: 63267] train loss: 4.454538, tar: 0.456645 
l0: 0.181489, l1: 0.186802, l2: 0.187794, l3: 0.194355, l4: 0.247087, l5: 0.352407, l6: 0.434920

[epoch:  65/100, batch:   488/  792, ite: 63268] train loss: 4.452799, tar: 0.456428 
l0: 0.382156, l1: 0.385718, l2: 0.383649, l3: 0.390672, l4: 0.429159, l5: 0.550895, l6: 0.683833

[epoch:  65/100, batch:   490/  792, ite: 63269] train loss: 4.452386, tar: 0.456369 
l0: 0.450104, l1: 0.454084, l2: 0.456234, l3: 0.461518, l4: 0.477621, l5: 0.521563, l6: 0.607998

[epoch:  65/100, batch:   492/  792, ite: 63270] train loss: 4.452108, tar: 0.456364 
l0: 0.264812, l1: 0.264523, l2: 0.262944, l3: 0.270439, l4: 0.309631, l5: 0.404522, l6: 0.549796

[epoch:  65/100, batch:   494/  792, ite: 63271] train loss: 4.450916, tar: 0.45

[epoch:  65/100, batch:   574/  792, ite: 63311] train loss: 4.461245, tar: 0.457402 
l0: 0.464917, l1: 0.470722, l2: 0.469292, l3: 0.468426, l4: 0.509810, l5: 0.691497, l6: 1.008312

[epoch:  65/100, batch:   576/  792, ite: 63312] train loss: 4.461681, tar: 0.457408 
l0: 0.357929, l1: 0.357619, l2: 0.357419, l3: 0.358266, l4: 0.386133, l5: 0.478439, l6: 0.627007

[epoch:  65/100, batch:   578/  792, ite: 63313] train loss: 4.461060, tar: 0.457332 
l0: 0.468305, l1: 0.473865, l2: 0.477246, l3: 0.478384, l4: 0.503685, l5: 0.615389, l6: 0.746898

[epoch:  65/100, batch:   580/  792, ite: 63314] train loss: 4.461259, tar: 0.457340 
l0: 0.558363, l1: 0.562565, l2: 0.561238, l3: 0.559626, l4: 0.597934, l5: 0.732404, l6: 0.931195

[epoch:  65/100, batch:   582/  792, ite: 63315] train loss: 4.462038, tar: 0.457417 
l0: 0.298571, l1: 0.296057, l2: 0.299809, l3: 0.302487, l4: 0.328770, l5: 0.494655, l6: 0.588869

[epoch:  65/100, batch:   584/  792, ite: 63316] train loss: 4.461226, tar: 0.45

[epoch:  65/100, batch:   664/  792, ite: 63356] train loss: 4.474191, tar: 0.459216 
l0: 0.304509, l1: 0.309012, l2: 0.307633, l3: 0.310712, l4: 0.326532, l5: 0.451099, l6: 0.538226

[epoch:  65/100, batch:   666/  792, ite: 63357] train loss: 4.473203, tar: 0.459102 
l0: 0.566969, l1: 0.546451, l2: 0.549164, l3: 0.537838, l4: 0.517959, l5: 0.537237, l6: 0.677170

[epoch:  65/100, batch:   668/  792, ite: 63358] train loss: 4.473363, tar: 0.459181 
l0: 0.720658, l1: 0.724175, l2: 0.717754, l3: 0.712604, l4: 0.697671, l5: 0.681294, l6: 0.814501

[epoch:  65/100, batch:   670/  792, ite: 63359] train loss: 4.474514, tar: 0.459374 
l0: 0.351388, l1: 0.352909, l2: 0.350262, l3: 0.347755, l4: 0.380065, l5: 0.477813, l6: 0.633323

[epoch:  65/100, batch:   672/  792, ite: 63360] train loss: 4.473885, tar: 0.459294 
l0: 0.347337, l1: 0.357619, l2: 0.355844, l3: 0.355733, l4: 0.361867, l5: 0.388809, l6: 0.488820

[epoch:  65/100, batch:   674/  792, ite: 63361] train loss: 4.472984, tar: 0.45

[epoch:  65/100, batch:   754/  792, ite: 63401] train loss: 4.470674, tar: 0.458933 
l0: 0.332764, l1: 0.329729, l2: 0.331075, l3: 0.334382, l4: 0.339170, l5: 0.438481, l6: 0.535250

[epoch:  65/100, batch:   756/  792, ite: 63402] train loss: 4.469800, tar: 0.458843 
l0: 0.475719, l1: 0.475275, l2: 0.476029, l3: 0.477861, l4: 0.516534, l5: 0.556490, l6: 0.817144

[epoch:  65/100, batch:   758/  792, ite: 63403] train loss: 4.469987, tar: 0.458855 
l0: 0.452437, l1: 0.457014, l2: 0.453084, l3: 0.462440, l4: 0.501275, l5: 0.618611, l6: 0.791066

[epoch:  65/100, batch:   760/  792, ite: 63404] train loss: 4.470084, tar: 0.458850 
l0: 0.582880, l1: 0.591307, l2: 0.590284, l3: 0.585574, l4: 0.636484, l5: 0.734386, l6: 0.870390

[epoch:  65/100, batch:   762/  792, ite: 63405] train loss: 4.470877, tar: 0.458938 
l0: 0.399451, l1: 0.393046, l2: 0.392874, l3: 0.390920, l4: 0.444877, l5: 0.521466, l6: 0.616105

[epoch:  65/100, batch:   764/  792, ite: 63406] train loss: 4.470438, tar: 0.45

l0: 0.388198, l1: 0.393224, l2: 0.394808, l3: 0.396088, l4: 0.422199, l5: 0.532364, l6: 0.655335

[epoch:  66/100, batch:    52/  792, ite: 63446] train loss: 4.465050, tar: 0.458053 
l0: 0.405264, l1: 0.409795, l2: 0.412370, l3: 0.417056, l4: 0.440521, l5: 0.544050, l6: 0.696583

[epoch:  66/100, batch:    54/  792, ite: 63447] train loss: 4.464861, tar: 0.458017 
l0: 0.216561, l1: 0.217787, l2: 0.219758, l3: 0.228898, l4: 0.245821, l5: 0.419250, l6: 0.552580

[epoch:  66/100, batch:    56/  792, ite: 63448] train loss: 4.463627, tar: 0.457850 
l0: 0.599764, l1: 0.601975, l2: 0.599530, l3: 0.597310, l4: 0.615375, l5: 0.672032, l6: 0.852916

[epoch:  66/100, batch:    58/  792, ite: 63449] train loss: 4.464321, tar: 0.457948 
l0: 0.392918, l1: 0.399029, l2: 0.397578, l3: 0.389291, l4: 0.394180, l5: 0.449829, l6: 0.643876

[epoch:  66/100, batch:    60/  792, ite: 63450] train loss: 4.463813, tar: 0.457903 
l0: 0.324466, l1: 0.327703, l2: 0.329479, l3: 0.342777, l4: 0.407503, l5: 0.5926

[epoch:  66/100, batch:   140/  792, ite: 63490] train loss: 4.472683, tar: 0.459109 
l0: 0.488096, l1: 0.487894, l2: 0.489066, l3: 0.499109, l4: 0.550797, l5: 0.737335, l6: 0.908692

[epoch:  66/100, batch:   142/  792, ite: 63491] train loss: 4.473098, tar: 0.459128 
l0: 0.285285, l1: 0.281774, l2: 0.281238, l3: 0.288848, l4: 0.311084, l5: 0.405654, l6: 0.463168

[epoch:  66/100, batch:   144/  792, ite: 63492] train loss: 4.472009, tar: 0.459012 
l0: 0.447116, l1: 0.451497, l2: 0.452224, l3: 0.454287, l4: 0.470182, l5: 0.576372, l6: 0.814145

[epoch:  66/100, batch:   146/  792, ite: 63493] train loss: 4.472023, tar: 0.459004 
l0: 0.393050, l1: 0.396182, l2: 0.396114, l3: 0.405440, l4: 0.428128, l5: 0.523309, l6: 0.546382

[epoch:  66/100, batch:   148/  792, ite: 63494] train loss: 4.471529, tar: 0.458960 
l0: 0.371088, l1: 0.371418, l2: 0.372640, l3: 0.369757, l4: 0.396175, l5: 0.435045, l6: 0.667248

[epoch:  66/100, batch:   150/  792, ite: 63495] train loss: 4.471089, tar: 0.45

[epoch:  66/100, batch:   230/  792, ite: 63535] train loss: 4.476481, tar: 0.459717 
l0: 0.271860, l1: 0.275701, l2: 0.275654, l3: 0.287730, l4: 0.299385, l5: 0.444508, l6: 0.482939

[epoch:  66/100, batch:   232/  792, ite: 63536] train loss: 4.475421, tar: 0.459595 
l0: 0.446157, l1: 0.439498, l2: 0.439712, l3: 0.437757, l4: 0.460734, l5: 0.534590, l6: 0.589232

[epoch:  66/100, batch:   234/  792, ite: 63537] train loss: 4.475132, tar: 0.459586 
l0: 0.406203, l1: 0.416093, l2: 0.418152, l3: 0.429457, l4: 0.449231, l5: 0.536207, l6: 0.642409

[epoch:  66/100, batch:   236/  792, ite: 63538] train loss: 4.474897, tar: 0.459552 
l0: 0.242411, l1: 0.246625, l2: 0.247360, l3: 0.255482, l4: 0.288467, l5: 0.497478, l6: 0.659489

[epoch:  66/100, batch:   238/  792, ite: 63539] train loss: 4.474037, tar: 0.459410 
l0: 0.377504, l1: 0.378778, l2: 0.379438, l3: 0.378913, l4: 0.413783, l5: 0.493267, l6: 0.663635

[epoch:  66/100, batch:   240/  792, ite: 63540] train loss: 4.473654, tar: 0.45

[epoch:  66/100, batch:   320/  792, ite: 63580] train loss: 4.469061, tar: 0.458611 
l0: 0.525987, l1: 0.524557, l2: 0.527700, l3: 0.543172, l4: 0.536133, l5: 0.590476, l6: 0.681108

[epoch:  66/100, batch:   322/  792, ite: 63581] train loss: 4.469166, tar: 0.458653 
l0: 0.634848, l1: 0.648340, l2: 0.643718, l3: 0.649735, l4: 0.666280, l5: 0.764797, l6: 0.829500

[epoch:  66/100, batch:   324/  792, ite: 63582] train loss: 4.469976, tar: 0.458765 
l0: 0.425003, l1: 0.421774, l2: 0.418958, l3: 0.417843, l4: 0.437314, l5: 0.604014, l6: 0.765612

[epoch:  66/100, batch:   326/  792, ite: 63583] train loss: 4.469869, tar: 0.458743 
l0: 0.344229, l1: 0.340377, l2: 0.341173, l3: 0.346756, l4: 0.347117, l5: 0.439349, l6: 0.601385

[epoch:  66/100, batch:   328/  792, ite: 63584] train loss: 4.469229, tar: 0.458671 
l0: 0.433773, l1: 0.438987, l2: 0.441070, l3: 0.445192, l4: 0.468360, l5: 0.630395, l6: 0.876001

[epoch:  66/100, batch:   330/  792, ite: 63585] train loss: 4.469369, tar: 0.45

[epoch:  66/100, batch:   410/  792, ite: 63625] train loss: 4.468578, tar: 0.458526 
l0: 0.573972, l1: 0.574127, l2: 0.574399, l3: 0.575242, l4: 0.619360, l5: 0.676022, l6: 0.948705

[epoch:  66/100, batch:   412/  792, ite: 63626] train loss: 4.469274, tar: 0.458597 
l0: 0.502117, l1: 0.507429, l2: 0.506686, l3: 0.504065, l4: 0.539608, l5: 0.639363, l6: 0.830657

[epoch:  66/100, batch:   414/  792, ite: 63627] train loss: 4.469549, tar: 0.458624 
l0: 0.366080, l1: 0.373568, l2: 0.373855, l3: 0.381422, l4: 0.390337, l5: 0.417315, l6: 0.734142

[epoch:  66/100, batch:   416/  792, ite: 63628] train loss: 4.469080, tar: 0.458567 
l0: 0.336485, l1: 0.338518, l2: 0.340245, l3: 0.350928, l4: 0.364958, l5: 0.464618, l6: 0.602221

[epoch:  66/100, batch:   418/  792, ite: 63629] train loss: 4.468491, tar: 0.458492 
l0: 0.520030, l1: 0.523929, l2: 0.524056, l3: 0.531203, l4: 0.548315, l5: 0.749852, l6: 0.952972

[epoch:  66/100, batch:   420/  792, ite: 63630] train loss: 4.468988, tar: 0.45

[epoch:  66/100, batch:   500/  792, ite: 63670] train loss: 4.469690, tar: 0.458466 
l0: 0.253651, l1: 0.250663, l2: 0.250707, l3: 0.256523, l4: 0.306621, l5: 0.384376, l6: 0.538761

[epoch:  66/100, batch:   502/  792, ite: 63671] train loss: 4.468685, tar: 0.458344 
l0: 0.468037, l1: 0.473692, l2: 0.470523, l3: 0.473619, l4: 0.482722, l5: 0.577158, l6: 0.764096

[epoch:  66/100, batch:   504/  792, ite: 63672] train loss: 4.468726, tar: 0.458350 
l0: 0.225763, l1: 0.230737, l2: 0.228738, l3: 0.238590, l4: 0.262972, l5: 0.375551, l6: 0.461116

[epoch:  66/100, batch:   506/  792, ite: 63673] train loss: 4.467561, tar: 0.458211 
l0: 0.442746, l1: 0.443765, l2: 0.444377, l3: 0.453722, l4: 0.496124, l5: 0.589566, l6: 0.729739

[epoch:  66/100, batch:   508/  792, ite: 63674] train loss: 4.467551, tar: 0.458201 
l0: 0.597412, l1: 0.596920, l2: 0.599701, l3: 0.599982, l4: 0.613709, l5: 0.673162, l6: 0.834331

[epoch:  66/100, batch:   510/  792, ite: 63675] train loss: 4.468088, tar: 0.45

[epoch:  66/100, batch:   590/  792, ite: 63715] train loss: 4.455850, tar: 0.456662 
l0: 0.349513, l1: 0.356853, l2: 0.359645, l3: 0.364164, l4: 0.398178, l5: 0.407799, l6: 0.596069

[epoch:  66/100, batch:   592/  792, ite: 63716] train loss: 4.455311, tar: 0.456600 
l0: 0.279634, l1: 0.278406, l2: 0.278792, l3: 0.281575, l4: 0.307469, l5: 0.419437, l6: 0.536468

[epoch:  66/100, batch:   594/  792, ite: 63717] train loss: 4.454438, tar: 0.456497 
l0: 0.313141, l1: 0.312717, l2: 0.311384, l3: 0.325306, l4: 0.349712, l5: 0.470897, l6: 0.645618

[epoch:  66/100, batch:   596/  792, ite: 63718] train loss: 4.453905, tar: 0.456414 
l0: 0.496023, l1: 0.494204, l2: 0.500980, l3: 0.522298, l4: 0.532813, l5: 0.645388, l6: 0.747029

[epoch:  66/100, batch:   598/  792, ite: 63719] train loss: 4.454076, tar: 0.456437 
l0: 0.445562, l1: 0.447022, l2: 0.445656, l3: 0.443140, l4: 0.488680, l5: 0.568022, l6: 0.763942

[epoch:  66/100, batch:   600/  792, ite: 63720] train loss: 4.454037, tar: 0.45

[epoch:  66/100, batch:   680/  792, ite: 63760] train loss: 4.446722, tar: 0.455521 
l0: 0.289604, l1: 0.292400, l2: 0.291889, l3: 0.299186, l4: 0.319731, l5: 0.414641, l6: 0.596076

[epoch:  66/100, batch:   682/  792, ite: 63761] train loss: 4.446009, tar: 0.455426 
l0: 0.597167, l1: 0.609147, l2: 0.614459, l3: 0.617799, l4: 0.637641, l5: 0.788635, l6: 0.933809

[epoch:  66/100, batch:   684/  792, ite: 63762] train loss: 4.446758, tar: 0.455507 
l0: 0.308749, l1: 0.309938, l2: 0.313373, l3: 0.316022, l4: 0.357047, l5: 0.528071, l6: 0.731784

[epoch:  66/100, batch:   686/  792, ite: 63763] train loss: 4.446340, tar: 0.455423 
l0: 0.461563, l1: 0.464693, l2: 0.467848, l3: 0.473499, l4: 0.512211, l5: 0.590348, l6: 0.614794

[epoch:  66/100, batch:   688/  792, ite: 63764] train loss: 4.446210, tar: 0.455427 
l0: 0.475083, l1: 0.464763, l2: 0.461746, l3: 0.458561, l4: 0.432513, l5: 0.527508, l6: 0.647195

[epoch:  66/100, batch:   690/  792, ite: 63765] train loss: 4.446060, tar: 0.45

[epoch:  66/100, batch:   770/  792, ite: 63805] train loss: 4.451984, tar: 0.456144 
l0: 0.324804, l1: 0.330129, l2: 0.331394, l3: 0.341277, l4: 0.357956, l5: 0.431142, l6: 0.603142

[epoch:  66/100, batch:   772/  792, ite: 63806] train loss: 4.451389, tar: 0.456071 
l0: 0.625139, l1: 0.626632, l2: 0.627546, l3: 0.629582, l4: 0.641152, l5: 0.670136, l6: 0.743953

[epoch:  66/100, batch:   774/  792, ite: 63807] train loss: 4.451899, tar: 0.456165 
l0: 0.301013, l1: 0.305528, l2: 0.304489, l3: 0.309775, l4: 0.336453, l5: 0.432835, l6: 0.616210

[epoch:  66/100, batch:   776/  792, ite: 63808] train loss: 4.451251, tar: 0.456079 
l0: 0.350821, l1: 0.358403, l2: 0.354486, l3: 0.349693, l4: 0.356174, l5: 0.427046, l6: 0.585609

[epoch:  66/100, batch:   778/  792, ite: 63809] train loss: 4.450734, tar: 0.456021 
l0: 0.373804, l1: 0.369973, l2: 0.370485, l3: 0.371361, l4: 0.395419, l5: 0.539316, l6: 0.659020

[epoch:  66/100, batch:   780/  792, ite: 63810] train loss: 4.450403, tar: 0.45

l0: 0.607773, l1: 0.607969, l2: 0.605065, l3: 0.612678, l4: 0.618942, l5: 0.676414, l6: 0.895832

[epoch:  67/100, batch:    68/  792, ite: 63850] train loss: 4.451160, tar: 0.456029 
l0: 0.401600, l1: 0.413919, l2: 0.412239, l3: 0.424580, l4: 0.465508, l5: 0.609305, l6: 0.782348

[epoch:  67/100, batch:    70/  792, ite: 63851] train loss: 4.451176, tar: 0.456000 
l0: 0.486410, l1: 0.488554, l2: 0.486394, l3: 0.483329, l4: 0.514405, l5: 0.603689, l6: 0.700704

[epoch:  67/100, batch:    72/  792, ite: 63852] train loss: 4.451287, tar: 0.456016 
l0: 0.364639, l1: 0.370941, l2: 0.368607, l3: 0.374366, l4: 0.384398, l5: 0.400634, l6: 0.458343

[epoch:  67/100, batch:    74/  792, ite: 63853] train loss: 4.450638, tar: 0.455967 
l0: 0.419995, l1: 0.416404, l2: 0.414908, l3: 0.420846, l4: 0.452290, l5: 0.565664, l6: 0.727082

[epoch:  67/100, batch:    76/  792, ite: 63854] train loss: 4.450510, tar: 0.455948 
l0: 0.311596, l1: 0.316845, l2: 0.318112, l3: 0.323720, l4: 0.344595, l5: 0.4922

[epoch:  67/100, batch:   156/  792, ite: 63894] train loss: 4.451739, tar: 0.455905 
l0: 0.796876, l1: 0.806553, l2: 0.800653, l3: 0.798378, l4: 0.817635, l5: 0.914357, l6: 0.874485

[epoch:  67/100, batch:   158/  792, ite: 63895] train loss: 4.452974, tar: 0.456085 
l0: 0.481510, l1: 0.477159, l2: 0.478785, l3: 0.483729, l4: 0.492957, l5: 0.566503, l6: 0.766064

[epoch:  67/100, batch:   160/  792, ite: 63896] train loss: 4.453011, tar: 0.456098 
l0: 0.537139, l1: 0.538063, l2: 0.538637, l3: 0.540322, l4: 0.570053, l5: 0.646779, l6: 0.824238

[epoch:  67/100, batch:   162/  792, ite: 63897] train loss: 4.453315, tar: 0.456141 
l0: 0.429880, l1: 0.429107, l2: 0.425213, l3: 0.422751, l4: 0.417935, l5: 0.510684, l6: 0.681730

[epoch:  67/100, batch:   164/  792, ite: 63898] train loss: 4.453067, tar: 0.456127 
l0: 0.200173, l1: 0.205081, l2: 0.206165, l3: 0.213773, l4: 0.243666, l5: 0.324588, l6: 0.468055

[epoch:  67/100, batch:   166/  792, ite: 63899] train loss: 4.451960, tar: 0.45

[epoch:  67/100, batch:   246/  792, ite: 63939] train loss: 4.447094, tar: 0.455343 
l0: 0.281371, l1: 0.278041, l2: 0.278301, l3: 0.270683, l4: 0.297149, l5: 0.464983, l6: 0.578861

[epoch:  67/100, batch:   248/  792, ite: 63940] train loss: 4.446390, tar: 0.455254 
l0: 0.301021, l1: 0.303922, l2: 0.306724, l3: 0.318707, l4: 0.362034, l5: 0.436570, l6: 0.553874

[epoch:  67/100, batch:   250/  792, ite: 63941] train loss: 4.445762, tar: 0.455174 
l0: 0.551575, l1: 0.544858, l2: 0.544346, l3: 0.541362, l4: 0.558221, l5: 0.582651, l6: 0.696092

[epoch:  67/100, batch:   252/  792, ite: 63942] train loss: 4.445959, tar: 0.455224 
l0: 0.344403, l1: 0.341462, l2: 0.342488, l3: 0.346400, l4: 0.395745, l5: 0.520243, l6: 0.711885

[epoch:  67/100, batch:   254/  792, ite: 63943] train loss: 4.445590, tar: 0.455167 
l0: 0.747747, l1: 0.750796, l2: 0.748494, l3: 0.767670, l4: 0.773793, l5: 0.852603, l6: 0.986582

[epoch:  67/100, batch:   256/  792, ite: 63944] train loss: 4.446821, tar: 0.45

[epoch:  67/100, batch:   336/  792, ite: 63984] train loss: 4.448910, tar: 0.455673 
l0: 0.560364, l1: 0.563031, l2: 0.562656, l3: 0.570462, l4: 0.586532, l5: 0.637456, l6: 0.894717

[epoch:  67/100, batch:   338/  792, ite: 63985] train loss: 4.449391, tar: 0.455726 
l0: 0.544188, l1: 0.537636, l2: 0.535007, l3: 0.543515, l4: 0.535161, l5: 0.636013, l6: 0.791692

[epoch:  67/100, batch:   340/  792, ite: 63986] train loss: 4.449636, tar: 0.455770 
l0: 0.212413, l1: 0.215410, l2: 0.214773, l3: 0.216443, l4: 0.253106, l5: 0.398427, l6: 0.566210

[epoch:  67/100, batch:   342/  792, ite: 63987] train loss: 4.448757, tar: 0.455648 
l0: 0.368115, l1: 0.372758, l2: 0.386766, l3: 0.395681, l4: 0.406001, l5: 0.555858, l6: 0.670183

[epoch:  67/100, batch:   344/  792, ite: 63988] train loss: 4.448491, tar: 0.455604 
l0: 0.732293, l1: 0.733100, l2: 0.732857, l3: 0.732704, l4: 0.753796, l5: 0.765970, l6: 0.833670

[epoch:  67/100, batch:   346/  792, ite: 63989] train loss: 4.449353, tar: 0.45

[epoch:  67/100, batch:   426/  792, ite: 64029] train loss: 4.988456, tar: 0.527095 
l0: 0.394599, l1: 0.397452, l2: 0.398672, l3: 0.399553, l4: 0.425517, l5: 0.428672, l6: 0.583914

[epoch:  67/100, batch:   428/  792, ite: 64030] train loss: 4.943876, tar: 0.522679 
l0: 0.321443, l1: 0.321032, l2: 0.321493, l3: 0.323187, l4: 0.346154, l5: 0.481219, l6: 0.629314

[epoch:  67/100, batch:   430/  792, ite: 64031] train loss: 4.894849, tar: 0.516187 
l0: 0.554696, l1: 0.568854, l2: 0.568127, l3: 0.576651, l4: 0.613870, l5: 0.708631, l6: 0.862038

[epoch:  67/100, batch:   432/  792, ite: 64032] train loss: 4.906889, tar: 0.517391 
l0: 0.535917, l1: 0.541615, l2: 0.536437, l3: 0.541602, l4: 0.593854, l5: 0.629089, l6: 1.028444

[epoch:  67/100, batch:   434/  792, ite: 64033] train loss: 4.926407, tar: 0.517952 
l0: 0.247051, l1: 0.250234, l2: 0.253263, l3: 0.267030, l4: 0.310995, l5: 0.419327, l6: 0.603799

[epoch:  67/100, batch:   436/  792, ite: 64034] train loss: 4.869857, tar: 0.50

[epoch:  67/100, batch:   516/  792, ite: 64074] train loss: 4.556514, tar: 0.468805 
l0: 0.853124, l1: 0.825849, l2: 0.803708, l3: 0.769640, l4: 0.774389, l5: 0.844339, l6: 0.890399

[epoch:  67/100, batch:   518/  792, ite: 64075] train loss: 4.585937, tar: 0.473929 
l0: 0.472678, l1: 0.473254, l2: 0.473286, l3: 0.486089, l4: 0.512210, l5: 0.601670, l6: 0.877393

[epoch:  67/100, batch:   520/  792, ite: 64076] train loss: 4.588780, tar: 0.473913 
l0: 0.315670, l1: 0.322319, l2: 0.321614, l3: 0.331377, l4: 0.389338, l5: 0.504836, l6: 0.683772

[epoch:  67/100, batch:   522/  792, ite: 64077] train loss: 4.575961, tar: 0.471858 
l0: 0.229842, l1: 0.232046, l2: 0.233557, l3: 0.243007, l4: 0.304908, l5: 0.388693, l6: 0.514029

[epoch:  67/100, batch:   524/  792, ite: 64078] train loss: 4.552006, tar: 0.468755 
l0: 0.318596, l1: 0.319453, l2: 0.321550, l3: 0.327611, l4: 0.359827, l5: 0.484572, l6: 0.522813

[epoch:  67/100, batch:   526/  792, ite: 64079] train loss: 4.535528, tar: 0.46

[epoch:  67/100, batch:   606/  792, ite: 64119] train loss: 4.566397, tar: 0.469183 
l0: 0.537706, l1: 0.539727, l2: 0.538549, l3: 0.539691, l4: 0.557745, l5: 0.627818, l6: 0.849077

[epoch:  67/100, batch:   608/  792, ite: 64120] train loss: 4.570495, tar: 0.469754 
l0: 0.296267, l1: 0.300682, l2: 0.301283, l3: 0.303907, l4: 0.334290, l5: 0.412297, l6: 0.570848

[epoch:  67/100, batch:   610/  792, ite: 64121] train loss: 4.558646, tar: 0.468320 
l0: 0.414303, l1: 0.416953, l2: 0.415623, l3: 0.421691, l4: 0.455784, l5: 0.615083, l6: 0.824992

[epoch:  67/100, batch:   612/  792, ite: 64122] train loss: 4.558028, tar: 0.467878 
l0: 0.268052, l1: 0.268888, l2: 0.268873, l3: 0.269545, l4: 0.301915, l5: 0.370136, l6: 0.603266

[epoch:  67/100, batch:   614/  792, ite: 64123] train loss: 4.545484, tar: 0.466253 
l0: 0.387309, l1: 0.391471, l2: 0.392081, l3: 0.394154, l4: 0.420992, l5: 0.527567, l6: 0.655375

[epoch:  67/100, batch:   616/  792, ite: 64124] train loss: 4.540162, tar: 0.46

[epoch:  67/100, batch:   696/  792, ite: 64164] train loss: 4.526478, tar: 0.464340 
l0: 0.282815, l1: 0.274843, l2: 0.277792, l3: 0.279026, l4: 0.289382, l5: 0.376728, l6: 0.541061

[epoch:  67/100, batch:   698/  792, ite: 64165] train loss: 4.516839, tar: 0.463240 
l0: 0.497429, l1: 0.491259, l2: 0.492002, l3: 0.498724, l4: 0.513436, l5: 0.614387, l6: 0.802492

[epoch:  67/100, batch:   700/  792, ite: 64166] train loss: 4.518039, tar: 0.463446 
l0: 0.303715, l1: 0.300586, l2: 0.295445, l3: 0.291677, l4: 0.332178, l5: 0.415049, l6: 0.623866

[epoch:  67/100, batch:   702/  792, ite: 64167] train loss: 4.509966, tar: 0.462490 
l0: 0.378694, l1: 0.382028, l2: 0.384171, l3: 0.391149, l4: 0.431380, l5: 0.532382, l6: 0.709589

[epoch:  67/100, batch:   704/  792, ite: 64168] train loss: 4.507003, tar: 0.461991 
l0: 0.476099, l1: 0.480588, l2: 0.479789, l3: 0.475898, l4: 0.515333, l5: 0.623679, l6: 0.770304

[epoch:  67/100, batch:   706/  792, ite: 64169] train loss: 4.507395, tar: 0.46

[epoch:  67/100, batch:   786/  792, ite: 64209] train loss: 4.472842, tar: 0.458356 
l0: 0.355525, l1: 0.359226, l2: 0.357880, l3: 0.366241, l4: 0.396551, l5: 0.537791, l6: 0.657086

[epoch:  67/100, batch:   788/  792, ite: 64210] train loss: 4.469122, tar: 0.457867 
l0: 0.552106, l1: 0.558298, l2: 0.557753, l3: 0.559051, l4: 0.578604, l5: 0.618857, l6: 0.731245

[epoch:  67/100, batch:   790/  792, ite: 64211] train loss: 4.470788, tar: 0.458313 
l0: 0.479361, l1: 0.482684, l2: 0.479545, l3: 0.482729, l4: 0.485769, l5: 0.487298, l6: 0.604163

[epoch:  67/100, batch:   792/  792, ite: 64212] train loss: 4.469319, tar: 0.458413 
Starting epoch 68
Epoch 68 loading complete
l0: 0.414540, l1: 0.419759, l2: 0.418019, l3: 0.419115, l4: 0.436448, l5: 0.528736, l6: 0.636038

[epoch:  68/100, batch:     2/  792, ite: 64213] train loss: 4.466884, tar: 0.458207 
l0: 0.395673, l1: 0.397950, l2: 0.400206, l3: 0.408327, l4: 0.438270, l5: 0.538783, l6: 0.763472

[epoch:  68/100, batch:     4/  792,

l0: 0.727873, l1: 0.731817, l2: 0.729676, l3: 0.729128, l4: 0.788838, l5: 0.772369, l6: 0.916762

[epoch:  68/100, batch:    84/  792, ite: 64254] train loss: 4.452314, tar: 0.455786 
l0: 0.187215, l1: 0.192286, l2: 0.191866, l3: 0.192763, l4: 0.219594, l5: 0.289067, l6: 0.472914

[epoch:  68/100, batch:    86/  792, ite: 64255] train loss: 4.443628, tar: 0.454733 
l0: 0.454501, l1: 0.458164, l2: 0.462092, l3: 0.462901, l4: 0.506289, l5: 0.671462, l6: 0.975962

[epoch:  68/100, batch:    88/  792, ite: 64256] train loss: 4.445161, tar: 0.454732 
l0: 0.245393, l1: 0.243186, l2: 0.235244, l3: 0.236574, l4: 0.276003, l5: 0.374645, l6: 0.535878

[epoch:  68/100, batch:    90/  792, ite: 64257] train loss: 4.438383, tar: 0.453917 
l0: 0.286736, l1: 0.288522, l2: 0.288177, l3: 0.292518, l4: 0.323620, l5: 0.370182, l6: 0.497068

[epoch:  68/100, batch:    92/  792, ite: 64258] train loss: 4.432091, tar: 0.453269 
l0: 0.484840, l1: 0.488102, l2: 0.488862, l3: 0.486324, l4: 0.483934, l5: 0.5312

[epoch:  68/100, batch:   172/  792, ite: 64298] train loss: 4.454694, tar: 0.455588 
l0: 0.182107, l1: 0.190408, l2: 0.192024, l3: 0.204013, l4: 0.269531, l5: 0.351404, l6: 0.521982

[epoch:  68/100, batch:   174/  792, ite: 64299] train loss: 4.447904, tar: 0.454673 
l0: 0.412423, l1: 0.417566, l2: 0.418786, l3: 0.417658, l4: 0.444683, l5: 0.558911, l6: 0.762342

[epoch:  68/100, batch:   176/  792, ite: 64300] train loss: 4.447286, tar: 0.454533 
l0: 0.341854, l1: 0.343498, l2: 0.338390, l3: 0.338638, l4: 0.348230, l5: 0.437894, l6: 0.555966

[epoch:  68/100, batch:   178/  792, ite: 64301] train loss: 4.443355, tar: 0.454158 
l0: 0.592415, l1: 0.600535, l2: 0.600916, l3: 0.595035, l4: 0.608931, l5: 0.670647, l6: 0.902923

[epoch:  68/100, batch:   180/  792, ite: 64302] train loss: 4.447025, tar: 0.454616 
l0: 0.337936, l1: 0.336939, l2: 0.338041, l3: 0.335316, l4: 0.331162, l5: 0.448991, l6: 0.625848

[epoch:  68/100, batch:   182/  792, ite: 64303] train loss: 4.443869, tar: 0.45

[epoch:  68/100, batch:   262/  792, ite: 64343] train loss: 4.439549, tar: 0.454689 
l0: 0.429768, l1: 0.427843, l2: 0.429058, l3: 0.440512, l4: 0.463084, l5: 0.572876, l6: 0.834144

[epoch:  68/100, batch:   264/  792, ite: 64344] train loss: 4.439622, tar: 0.454616 
l0: 0.576356, l1: 0.570185, l2: 0.565351, l3: 0.565190, l4: 0.557795, l5: 0.695106, l6: 0.886578

[epoch:  68/100, batch:   266/  792, ite: 64345] train loss: 4.442537, tar: 0.454969 
l0: 0.206883, l1: 0.216889, l2: 0.217453, l3: 0.214795, l4: 0.235512, l5: 0.404167, l6: 0.575278

[epoch:  68/100, batch:   268/  792, ite: 64346] train loss: 4.437442, tar: 0.454252 
l0: 0.330512, l1: 0.333369, l2: 0.332185, l3: 0.337445, l4: 0.359281, l5: 0.475520, l6: 0.563673

[epoch:  68/100, batch:   270/  792, ite: 64347] train loss: 4.434467, tar: 0.453896 
l0: 0.377097, l1: 0.380810, l2: 0.380091, l3: 0.380613, l4: 0.396378, l5: 0.445408, l6: 0.533065

[epoch:  68/100, batch:   272/  792, ite: 64348] train loss: 4.431658, tar: 0.45

[epoch:  68/100, batch:   352/  792, ite: 64388] train loss: 4.468434, tar: 0.458204 
l0: 0.283923, l1: 0.284081, l2: 0.282553, l3: 0.289189, l4: 0.304504, l5: 0.424845, l6: 0.549580

[epoch:  68/100, batch:   354/  792, ite: 64389] train loss: 4.464711, tar: 0.457756 
l0: 0.398167, l1: 0.402090, l2: 0.402754, l3: 0.419939, l4: 0.466727, l5: 0.573385, l6: 0.653206

[epoch:  68/100, batch:   356/  792, ite: 64390] train loss: 4.463451, tar: 0.457603 
l0: 0.581443, l1: 0.578902, l2: 0.577602, l3: 0.588984, l4: 0.624405, l5: 0.662183, l6: 0.853173

[epoch:  68/100, batch:   358/  792, ite: 64391] train loss: 4.465818, tar: 0.457920 
l0: 0.316686, l1: 0.319898, l2: 0.320255, l3: 0.323478, l4: 0.361822, l5: 0.441922, l6: 0.576758

[epoch:  68/100, batch:   360/  792, ite: 64392] train loss: 4.462633, tar: 0.457559 
l0: 0.449039, l1: 0.448677, l2: 0.444628, l3: 0.443089, l4: 0.480410, l5: 0.588106, l6: 0.782624

[epoch:  68/100, batch:   362/  792, ite: 64393] train loss: 4.462672, tar: 0.45

[epoch:  68/100, batch:   442/  792, ite: 64433] train loss: 4.440369, tar: 0.454439 
l0: 0.300319, l1: 0.297327, l2: 0.297863, l3: 0.299720, l4: 0.305267, l5: 0.405831, l6: 0.598073

[epoch:  68/100, batch:   444/  792, ite: 64434] train loss: 4.437358, tar: 0.454084 
l0: 0.806189, l1: 0.808711, l2: 0.819770, l3: 0.824984, l4: 0.890636, l5: 1.005189, l6: 1.179152

[epoch:  68/100, batch:   446/  792, ite: 64435] train loss: 4.444603, tar: 0.454894 
l0: 0.307809, l1: 0.306732, l2: 0.306072, l3: 0.316953, l4: 0.350713, l5: 0.471265, l6: 0.562368

[epoch:  68/100, batch:   448/  792, ite: 64436] train loss: 4.441804, tar: 0.454556 
l0: 0.382840, l1: 0.387213, l2: 0.387714, l3: 0.394571, l4: 0.402130, l5: 0.452591, l6: 0.624092

[epoch:  68/100, batch:   450/  792, ite: 64437] train loss: 4.440476, tar: 0.454392 
l0: 1.091915, l1: 1.087240, l2: 1.088589, l3: 1.105452, l4: 1.123659, l5: 1.074264, l6: 1.220135

[epoch:  68/100, batch:   452/  792, ite: 64438] train loss: 4.450666, tar: 0.45

[epoch:  68/100, batch:   532/  792, ite: 64478] train loss: 4.446517, tar: 0.455271 
l0: 0.320506, l1: 0.325458, l2: 0.324305, l3: 0.328946, l4: 0.326627, l5: 0.472195, l6: 0.639685

[epoch:  68/100, batch:   534/  792, ite: 64479] train loss: 4.444268, tar: 0.454990 
l0: 0.312820, l1: 0.316274, l2: 0.315301, l3: 0.316529, l4: 0.346319, l5: 0.451998, l6: 0.641414

[epoch:  68/100, batch:   536/  792, ite: 64480] train loss: 4.442200, tar: 0.454694 
l0: 0.473085, l1: 0.472391, l2: 0.471886, l3: 0.482938, l4: 0.515227, l5: 0.759800, l6: 1.032196

[epoch:  68/100, batch:   538/  792, ite: 64481] train loss: 4.444071, tar: 0.454732 
l0: 0.530401, l1: 0.541331, l2: 0.542242, l3: 0.544009, l4: 0.608002, l5: 0.673817, l6: 0.860718

[epoch:  68/100, batch:   540/  792, ite: 64482] train loss: 4.445630, tar: 0.454889 
l0: 0.274944, l1: 0.275252, l2: 0.266311, l3: 0.266329, l4: 0.298057, l5: 0.400341, l6: 0.495083

[epoch:  68/100, batch:   542/  792, ite: 64483] train loss: 4.442422, tar: 0.45

[epoch:  68/100, batch:   622/  792, ite: 64523] train loss: 4.432708, tar: 0.453045 
l0: 0.438327, l1: 0.431985, l2: 0.434031, l3: 0.443493, l4: 0.442196, l5: 0.507880, l6: 0.616926

[epoch:  68/100, batch:   624/  792, ite: 64524] train loss: 4.431886, tar: 0.453016 
l0: 0.687745, l1: 0.688660, l2: 0.686071, l3: 0.691060, l4: 0.717040, l5: 0.789313, l6: 0.993800

[epoch:  68/100, batch:   626/  792, ite: 64525] train loss: 4.435539, tar: 0.453464 
l0: 0.421077, l1: 0.421247, l2: 0.420415, l3: 0.423913, l4: 0.458517, l5: 0.578629, l6: 0.761647

[epoch:  68/100, batch:   628/  792, ite: 64526] train loss: 4.435433, tar: 0.453402 
l0: 0.564559, l1: 0.564023, l2: 0.562698, l3: 0.565284, l4: 0.587767, l5: 0.670820, l6: 0.724867

[epoch:  68/100, batch:   630/  792, ite: 64527] train loss: 4.436475, tar: 0.453613 
l0: 0.272782, l1: 0.273238, l2: 0.274428, l3: 0.276547, l4: 0.301960, l5: 0.428342, l6: 0.564295

[epoch:  68/100, batch:   632/  792, ite: 64528] train loss: 4.433850, tar: 0.45

[epoch:  68/100, batch:   712/  792, ite: 64568] train loss: 4.465032, tar: 0.457150 
l0: 0.396107, l1: 0.396812, l2: 0.396122, l3: 0.405965, l4: 0.432626, l5: 0.512255, l6: 0.673070

[epoch:  68/100, batch:   714/  792, ite: 64569] train loss: 4.464065, tar: 0.457042 
l0: 0.373650, l1: 0.376007, l2: 0.378974, l3: 0.387924, l4: 0.415880, l5: 0.545845, l6: 0.678435

[epoch:  68/100, batch:   716/  792, ite: 64570] train loss: 4.463314, tar: 0.456896 
l0: 0.433408, l1: 0.441832, l2: 0.440497, l3: 0.433387, l4: 0.453176, l5: 0.457034, l6: 0.557588

[epoch:  68/100, batch:   718/  792, ite: 64571] train loss: 4.462383, tar: 0.456855 
l0: 0.508568, l1: 0.506129, l2: 0.507158, l3: 0.504328, l4: 0.514324, l5: 0.565323, l6: 0.761527

[epoch:  68/100, batch:   720/  792, ite: 64572] train loss: 4.462821, tar: 0.456945 
l0: 0.383179, l1: 0.387652, l2: 0.384272, l3: 0.388038, l4: 0.418206, l5: 0.541403, l6: 0.847781

[epoch:  68/100, batch:   722/  792, ite: 64573] train loss: 4.462581, tar: 0.45

l0: 0.334832, l1: 0.334973, l2: 0.337843, l3: 0.348972, l4: 0.380898, l5: 0.461435, l6: 0.606722

[epoch:  69/100, batch:    10/  792, ite: 64613] train loss: 4.445848, tar: 0.454525 
l0: 0.235347, l1: 0.237346, l2: 0.237255, l3: 0.238090, l4: 0.255886, l5: 0.300856, l6: 0.377645

[epoch:  69/100, batch:    12/  792, ite: 64614] train loss: 4.442364, tar: 0.454168 
l0: 0.267930, l1: 0.274455, l2: 0.274062, l3: 0.273708, l4: 0.315854, l5: 0.437765, l6: 0.532002

[epoch:  69/100, batch:    14/  792, ite: 64615] train loss: 4.440019, tar: 0.453865 
l0: 0.375991, l1: 0.389785, l2: 0.388438, l3: 0.399755, l4: 0.440807, l5: 0.718624, l6: 0.855000

[epoch:  69/100, batch:    16/  792, ite: 64616] train loss: 4.440234, tar: 0.453739 
l0: 0.284854, l1: 0.284882, l2: 0.284479, l3: 0.292194, l4: 0.313991, l5: 0.406443, l6: 0.515911

[epoch:  69/100, batch:    18/  792, ite: 64617] train loss: 4.437908, tar: 0.453465 
l0: 0.491553, l1: 0.491792, l2: 0.493127, l3: 0.496356, l4: 0.484252, l5: 0.5743

[epoch:  69/100, batch:    98/  792, ite: 64657] train loss: 4.448967, tar: 0.454730 
l0: 0.225273, l1: 0.229604, l2: 0.231201, l3: 0.240420, l4: 0.257137, l5: 0.361895, l6: 0.512482

[epoch:  69/100, batch:   100/  792, ite: 64658] train loss: 4.446244, tar: 0.454381 
l0: 0.225722, l1: 0.224611, l2: 0.223750, l3: 0.230068, l4: 0.271056, l5: 0.437776, l6: 0.514629

[epoch:  69/100, batch:   102/  792, ite: 64659] train loss: 4.443575, tar: 0.454034 
l0: 0.530166, l1: 0.537036, l2: 0.534801, l3: 0.533719, l4: 0.536391, l5: 0.622142, l6: 0.798169

[epoch:  69/100, batch:   104/  792, ite: 64660] train loss: 4.444351, tar: 0.454149 
l0: 0.434686, l1: 0.439725, l2: 0.435913, l3: 0.438376, l4: 0.464725, l5: 0.584263, l6: 0.779297

[epoch:  69/100, batch:   106/  792, ite: 64661] train loss: 4.444236, tar: 0.454120 
l0: 0.756939, l1: 0.767531, l2: 0.768501, l3: 0.759891, l4: 0.786818, l5: 0.871962, l6: 1.026971

[epoch:  69/100, batch:   108/  792, ite: 64662] train loss: 4.447715, tar: 0.45

[epoch:  69/100, batch:   188/  792, ite: 64702] train loss: 4.472816, tar: 0.457356 
l0: 0.425864, l1: 0.423783, l2: 0.426837, l3: 0.434953, l4: 0.451023, l5: 0.526870, l6: 0.731134

[epoch:  69/100, batch:   190/  792, ite: 64703] train loss: 4.472412, tar: 0.457311 
l0: 0.357991, l1: 0.362552, l2: 0.357646, l3: 0.361405, l4: 0.363511, l5: 0.518043, l6: 0.632400

[epoch:  69/100, batch:   192/  792, ite: 64704] train loss: 4.471087, tar: 0.457170 
l0: 0.429017, l1: 0.427859, l2: 0.427848, l3: 0.430551, l4: 0.451684, l5: 0.502676, l6: 0.602625

[epoch:  69/100, batch:   194/  792, ite: 64705] train loss: 4.470305, tar: 0.457130 
l0: 0.661500, l1: 0.666207, l2: 0.665315, l3: 0.661282, l4: 0.684565, l5: 0.684712, l6: 0.831693

[epoch:  69/100, batch:   196/  792, ite: 64706] train loss: 4.472103, tar: 0.457419 
l0: 0.439642, l1: 0.437954, l2: 0.440429, l3: 0.447547, l4: 0.449213, l5: 0.565162, l6: 0.772856

[epoch:  69/100, batch:   198/  792, ite: 64707] train loss: 4.471945, tar: 0.45

[epoch:  69/100, batch:   278/  792, ite: 64747] train loss: 4.462621, tar: 0.456486 
l0: 0.787626, l1: 0.787742, l2: 0.791323, l3: 0.794533, l4: 0.818689, l5: 0.844798, l6: 0.942452

[epoch:  69/100, batch:   280/  792, ite: 64748] train loss: 4.465685, tar: 0.456929 
l0: 0.291026, l1: 0.295537, l2: 0.296736, l3: 0.307044, l4: 0.323876, l5: 0.407795, l6: 0.493496

[epoch:  69/100, batch:   282/  792, ite: 64749] train loss: 4.463778, tar: 0.456707 
l0: 0.271592, l1: 0.273632, l2: 0.277309, l3: 0.287908, l4: 0.319037, l5: 0.537198, l6: 0.850271

[epoch:  69/100, batch:   284/  792, ite: 64750] train loss: 4.462710, tar: 0.456461 
l0: 0.233456, l1: 0.236458, l2: 0.235952, l3: 0.241987, l4: 0.286938, l5: 0.391590, l6: 0.528120

[epoch:  69/100, batch:   286/  792, ite: 64751] train loss: 4.460430, tar: 0.456164 
l0: 0.234979, l1: 0.238162, l2: 0.241452, l3: 0.243434, l4: 0.281159, l5: 0.335367, l6: 0.498394

[epoch:  69/100, batch:   288/  792, ite: 64752] train loss: 4.458028, tar: 0.45

[epoch:  69/100, batch:   368/  792, ite: 64792] train loss: 4.454377, tar: 0.455803 
l0: 0.318990, l1: 0.314255, l2: 0.313476, l3: 0.318851, l4: 0.321876, l5: 0.355128, l6: 0.383633

[epoch:  69/100, batch:   370/  792, ite: 64793] train loss: 4.452245, tar: 0.455631 
l0: 0.256001, l1: 0.261664, l2: 0.262403, l3: 0.272297, l4: 0.271534, l5: 0.387986, l6: 0.553365

[epoch:  69/100, batch:   372/  792, ite: 64794] train loss: 4.450176, tar: 0.455379 
l0: 0.300904, l1: 0.306188, l2: 0.307680, l3: 0.316803, l4: 0.358652, l5: 0.453717, l6: 0.626487

[epoch:  69/100, batch:   374/  792, ite: 64795] train loss: 4.448856, tar: 0.455185 
l0: 0.638735, l1: 0.641016, l2: 0.641988, l3: 0.643158, l4: 0.675899, l5: 0.744461, l6: 0.984727

[epoch:  69/100, batch:   376/  792, ite: 64796] train loss: 4.451044, tar: 0.455416 
l0: 0.531207, l1: 0.525672, l2: 0.522457, l3: 0.524272, l4: 0.558073, l5: 0.627409, l6: 0.835063

[epoch:  69/100, batch:   378/  792, ite: 64797] train loss: 4.451689, tar: 0.45

[epoch:  69/100, batch:   458/  792, ite: 64837] train loss: 4.443974, tar: 0.455002 
l0: 0.538192, l1: 0.539764, l2: 0.537271, l3: 0.542080, l4: 0.534049, l5: 0.612365, l6: 0.741035

[epoch:  69/100, batch:   460/  792, ite: 64838] train loss: 4.444415, tar: 0.455102 
l0: 0.672745, l1: 0.668673, l2: 0.666510, l3: 0.673357, l4: 0.691069, l5: 0.811563, l6: 0.881329

[epoch:  69/100, batch:   462/  792, ite: 64839] train loss: 4.446228, tar: 0.455361 
l0: 0.366066, l1: 0.368932, l2: 0.369147, l3: 0.370009, l4: 0.394139, l5: 0.490317, l6: 0.584791

[epoch:  69/100, batch:   464/  792, ite: 64840] train loss: 4.445169, tar: 0.455255 
l0: 0.517084, l1: 0.517158, l2: 0.514709, l3: 0.520867, l4: 0.557146, l5: 0.665568, l6: 0.799594

[epoch:  69/100, batch:   466/  792, ite: 64841] train loss: 4.445790, tar: 0.455328 
l0: 0.419490, l1: 0.426469, l2: 0.428402, l3: 0.432622, l4: 0.440578, l5: 0.601405, l6: 0.745336

[epoch:  69/100, batch:   468/  792, ite: 64842] train loss: 4.445752, tar: 0.45

[epoch:  69/100, batch:   548/  792, ite: 64882] train loss: 4.430548, tar: 0.452997 
l0: 0.748539, l1: 0.759634, l2: 0.766708, l3: 0.774249, l4: 0.796110, l5: 0.870388, l6: 0.910408

[epoch:  69/100, batch:   550/  792, ite: 64883] train loss: 4.433083, tar: 0.453331 
l0: 0.459585, l1: 0.470085, l2: 0.468369, l3: 0.469583, l4: 0.494527, l5: 0.557281, l6: 0.658046

[epoch:  69/100, batch:   552/  792, ite: 64884] train loss: 4.432867, tar: 0.453338 
l0: 0.325319, l1: 0.332653, l2: 0.330967, l3: 0.324967, l4: 0.339184, l5: 0.362567, l6: 0.457209

[epoch:  69/100, batch:   554/  792, ite: 64885] train loss: 4.431237, tar: 0.453194 
l0: 0.706446, l1: 0.716635, l2: 0.710921, l3: 0.722447, l4: 0.749218, l5: 0.713754, l6: 0.796096

[epoch:  69/100, batch:   556/  792, ite: 64886] train loss: 4.433098, tar: 0.453480 
l0: 0.266954, l1: 0.268455, l2: 0.269515, l3: 0.269984, l4: 0.306992, l5: 0.376437, l6: 0.530847

[epoch:  69/100, batch:   558/  792, ite: 64887] train loss: 4.431274, tar: 0.45

[epoch:  69/100, batch:   638/  792, ite: 64927] train loss: 4.443749, tar: 0.454617 
l0: 0.484247, l1: 0.490480, l2: 0.495791, l3: 0.516573, l4: 0.529802, l5: 0.611728, l6: 0.882547

[epoch:  69/100, batch:   640/  792, ite: 64928] train loss: 4.444217, tar: 0.454649 
l0: 0.362989, l1: 0.363399, l2: 0.362558, l3: 0.373433, l4: 0.407317, l5: 0.508353, l6: 0.635572

[epoch:  69/100, batch:   642/  792, ite: 64929] train loss: 4.443411, tar: 0.454551 
l0: 0.399430, l1: 0.410381, l2: 0.412056, l3: 0.406538, l4: 0.417623, l5: 0.597092, l6: 0.651677

[epoch:  69/100, batch:   644/  792, ite: 64930] train loss: 4.442935, tar: 0.454491 
l0: 0.463894, l1: 0.456388, l2: 0.454839, l3: 0.455400, l4: 0.481690, l5: 0.578865, l6: 0.726446

[epoch:  69/100, batch:   646/  792, ite: 64931] train loss: 4.442987, tar: 0.454501 
l0: 0.282595, l1: 0.282694, l2: 0.286229, l3: 0.290994, l4: 0.318303, l5: 0.425631, l6: 0.587091

[epoch:  69/100, batch:   648/  792, ite: 64932] train loss: 4.441534, tar: 0.45

[epoch:  69/100, batch:   728/  792, ite: 64972] train loss: 4.437992, tar: 0.453827 
l0: 0.602222, l1: 0.614431, l2: 0.612143, l3: 0.613604, l4: 0.649396, l5: 0.878340, l6: 1.107935

[epoch:  69/100, batch:   730/  792, ite: 64973] train loss: 4.439793, tar: 0.453980 
l0: 0.739014, l1: 0.736616, l2: 0.738000, l3: 0.753741, l4: 0.811554, l5: 0.825686, l6: 1.328827

[epoch:  69/100, batch:   732/  792, ite: 64974] train loss: 4.442565, tar: 0.454272 
l0: 0.402832, l1: 0.405909, l2: 0.405297, l3: 0.407757, l4: 0.432993, l5: 0.567168, l6: 0.688680

[epoch:  69/100, batch:   734/  792, ite: 64975] train loss: 4.442100, tar: 0.454220 
l0: 0.324090, l1: 0.323556, l2: 0.323495, l3: 0.336787, l4: 0.358763, l5: 0.473450, l6: 0.563438

[epoch:  69/100, batch:   736/  792, ite: 64976] train loss: 4.440992, tar: 0.454086 
l0: 0.545467, l1: 0.544435, l2: 0.539533, l3: 0.542013, l4: 0.544577, l5: 0.619913, l6: 0.657957

[epoch:  69/100, batch:   738/  792, ite: 64977] train loss: 4.441228, tar: 0.45

l0: 0.425344, l1: 0.424778, l2: 0.429612, l3: 0.430640, l4: 0.473268, l5: 0.591862, l6: 0.743912

[epoch:  70/100, batch:    26/  792, ite: 65017] train loss: 4.422555, tar: 0.451962 
l0: 0.181141, l1: 0.190301, l2: 0.191446, l3: 0.200173, l4: 0.239579, l5: 0.417266, l6: 0.485114

[epoch:  70/100, batch:    28/  792, ite: 65018] train loss: 4.420579, tar: 0.451696 
l0: 0.500091, l1: 0.498616, l2: 0.495039, l3: 0.486349, l4: 0.469768, l5: 0.513772, l6: 0.644661

[epoch:  70/100, batch:    30/  792, ite: 65019] train loss: 4.420590, tar: 0.451743 
l0: 0.759170, l1: 0.761007, l2: 0.756639, l3: 0.751681, l4: 0.766394, l5: 0.807581, l6: 0.879101

[epoch:  70/100, batch:    32/  792, ite: 65020] train loss: 4.422579, tar: 0.452044 
l0: 0.326284, l1: 0.331917, l2: 0.329307, l3: 0.329741, l4: 0.339781, l5: 0.472023, l6: 0.702483

[epoch:  70/100, batch:    34/  792, ite: 65021] train loss: 4.421551, tar: 0.451921 
l0: 0.604254, l1: 0.607209, l2: 0.606365, l3: 0.608674, l4: 0.658589, l5: 0.7277

[epoch:  70/100, batch:   114/  792, ite: 65061] train loss: 4.421557, tar: 0.452093 
l0: 0.528330, l1: 0.539232, l2: 0.533690, l3: 0.536094, l4: 0.577022, l5: 0.707767, l6: 0.910279

[epoch:  70/100, batch:   116/  792, ite: 65062] train loss: 4.422344, tar: 0.452165 
l0: 0.634007, l1: 0.645262, l2: 0.644680, l3: 0.639003, l4: 0.641391, l5: 0.636808, l6: 0.671829

[epoch:  70/100, batch:   118/  792, ite: 65063] train loss: 4.423129, tar: 0.452336 
l0: 0.286862, l1: 0.289311, l2: 0.289995, l3: 0.299745, l4: 0.312618, l5: 0.480363, l6: 0.613441

[epoch:  70/100, batch:   120/  792, ite: 65064] train loss: 4.422050, tar: 0.452180 
l0: 0.340644, l1: 0.337767, l2: 0.338409, l3: 0.340551, l4: 0.338186, l5: 0.387091, l6: 0.488950

[epoch:  70/100, batch:   122/  792, ite: 65065] train loss: 4.420809, tar: 0.452075 
l0: 0.277675, l1: 0.289526, l2: 0.286679, l3: 0.287189, l4: 0.308462, l5: 0.418536, l6: 0.625939

[epoch:  70/100, batch:   124/  792, ite: 65066] train loss: 4.419548, tar: 0.45

[epoch:  70/100, batch:   204/  792, ite: 65106] train loss: 4.430523, tar: 0.453403 
l0: 0.433735, l1: 0.432430, l2: 0.430318, l3: 0.433320, l4: 0.444086, l5: 0.465050, l6: 0.685699

[epoch:  70/100, batch:   206/  792, ite: 65107] train loss: 4.430198, tar: 0.453385 
l0: 0.485058, l1: 0.490192, l2: 0.490758, l3: 0.488801, l4: 0.517510, l5: 0.707163, l6: 0.860683

[epoch:  70/100, batch:   208/  792, ite: 65108] train loss: 4.430787, tar: 0.453414 
l0: 0.259159, l1: 0.257752, l2: 0.253865, l3: 0.257563, l4: 0.283510, l5: 0.345935, l6: 0.508139

[epoch:  70/100, batch:   210/  792, ite: 65109] train loss: 4.429243, tar: 0.453239 
l0: 0.310400, l1: 0.308346, l2: 0.308934, l3: 0.312286, l4: 0.347550, l5: 0.420607, l6: 0.508222

[epoch:  70/100, batch:   212/  792, ite: 65110] train loss: 4.428095, tar: 0.453110 
l0: 0.366282, l1: 0.377408, l2: 0.379710, l3: 0.378857, l4: 0.422883, l5: 0.529119, l6: 0.750783

[epoch:  70/100, batch:   214/  792, ite: 65111] train loss: 4.427815, tar: 0.45

[epoch:  70/100, batch:   294/  792, ite: 65151] train loss: 4.422164, tar: 0.452694 
l0: 0.504845, l1: 0.505834, l2: 0.506574, l3: 0.510683, l4: 0.535195, l5: 0.600606, l6: 0.862763

[epoch:  70/100, batch:   296/  792, ite: 65152] train loss: 4.422523, tar: 0.452739 
l0: 0.731699, l1: 0.737508, l2: 0.740440, l3: 0.737916, l4: 0.747368, l5: 0.764427, l6: 0.806599

[epoch:  70/100, batch:   298/  792, ite: 65153] train loss: 4.423960, tar: 0.452981 
l0: 0.386458, l1: 0.380324, l2: 0.383069, l3: 0.390060, l4: 0.409062, l5: 0.482633, l6: 0.621921

[epoch:  70/100, batch:   300/  792, ite: 65154] train loss: 4.423388, tar: 0.452923 
l0: 0.280231, l1: 0.292024, l2: 0.295182, l3: 0.307363, l4: 0.340462, l5: 0.504166, l6: 0.701667

[epoch:  70/100, batch:   302/  792, ite: 65155] train loss: 4.422529, tar: 0.452774 
l0: 0.325873, l1: 0.331061, l2: 0.330878, l3: 0.331597, l4: 0.325581, l5: 0.445018, l6: 0.631511

[epoch:  70/100, batch:   304/  792, ite: 65156] train loss: 4.421582, tar: 0.45

[epoch:  70/100, batch:   384/  792, ite: 65196] train loss: 4.427318, tar: 0.453763 
l0: 0.312055, l1: 0.314924, l2: 0.316439, l3: 0.317902, l4: 0.330775, l5: 0.428615, l6: 0.506695

[epoch:  70/100, batch:   386/  792, ite: 65197] train loss: 4.426170, tar: 0.453645 
l0: 0.811959, l1: 0.802937, l2: 0.801996, l3: 0.802049, l4: 0.840726, l5: 0.889701, l6: 1.002956

[epoch:  70/100, batch:   388/  792, ite: 65198] train loss: 4.428407, tar: 0.453944 
l0: 0.282166, l1: 0.292937, l2: 0.292718, l3: 0.303522, l4: 0.320028, l5: 0.477740, l6: 0.656204

[epoch:  70/100, batch:   390/  792, ite: 65199] train loss: 4.427464, tar: 0.453801 
l0: 0.343862, l1: 0.345513, l2: 0.347765, l3: 0.350389, l4: 0.380639, l5: 0.538183, l6: 0.696889

[epoch:  70/100, batch:   392/  792, ite: 65200] train loss: 4.426914, tar: 0.453709 
l0: 0.460819, l1: 0.463836, l2: 0.459990, l3: 0.455019, l4: 0.480432, l5: 0.547301, l6: 0.733062

[epoch:  70/100, batch:   394/  792, ite: 65201] train loss: 4.426933, tar: 0.45

[epoch:  70/100, batch:   474/  792, ite: 65241] train loss: 4.425015, tar: 0.453454 
l0: 0.394925, l1: 0.391388, l2: 0.386018, l3: 0.387752, l4: 0.403911, l5: 0.495234, l6: 0.659586

[epoch:  70/100, batch:   476/  792, ite: 65242] train loss: 4.424496, tar: 0.453407 
l0: 0.330431, l1: 0.334535, l2: 0.331592, l3: 0.325749, l4: 0.343931, l5: 0.510728, l6: 0.742753

[epoch:  70/100, batch:   478/  792, ite: 65243] train loss: 4.423900, tar: 0.453308 
l0: 0.303057, l1: 0.304321, l2: 0.306405, l3: 0.316416, l4: 0.374235, l5: 0.523148, l6: 0.737575

[epoch:  70/100, batch:   480/  792, ite: 65244] train loss: 4.423181, tar: 0.453187 
l0: 0.503816, l1: 0.504707, l2: 0.507173, l3: 0.504961, l4: 0.517232, l5: 0.526969, l6: 0.635161

[epoch:  70/100, batch:   482/  792, ite: 65245] train loss: 4.423156, tar: 0.453228 
l0: 0.212500, l1: 0.213466, l2: 0.215813, l3: 0.221365, l4: 0.237500, l5: 0.290919, l6: 0.392764

[epoch:  70/100, batch:   484/  792, ite: 65246] train loss: 4.421394, tar: 0.45

[epoch:  70/100, batch:   564/  792, ite: 65286] train loss: 4.427243, tar: 0.453515 
l0: 0.252928, l1: 0.258435, l2: 0.259480, l3: 0.277677, l4: 0.299998, l5: 0.389997, l6: 0.513165

[epoch:  70/100, batch:   566/  792, ite: 65287] train loss: 4.425992, tar: 0.453359 
l0: 0.347719, l1: 0.349585, l2: 0.351670, l3: 0.360083, l4: 0.375264, l5: 0.462398, l6: 0.622622

[epoch:  70/100, batch:   568/  792, ite: 65288] train loss: 4.425305, tar: 0.453277 
l0: 0.506666, l1: 0.509043, l2: 0.506062, l3: 0.520468, l4: 0.543405, l5: 0.668096, l6: 0.991903

[epoch:  70/100, batch:   570/  792, ite: 65289] train loss: 4.426069, tar: 0.453319 
l0: 0.529020, l1: 0.537123, l2: 0.536816, l3: 0.542539, l4: 0.550979, l5: 0.656375, l6: 0.843987

[epoch:  70/100, batch:   572/  792, ite: 65290] train loss: 4.426615, tar: 0.453377 
l0: 0.523911, l1: 0.521525, l2: 0.520474, l3: 0.516954, l4: 0.542241, l5: 0.627109, l6: 0.828762

[epoch:  70/100, batch:   574/  792, ite: 65291] train loss: 4.426955, tar: 0.45

[epoch:  70/100, batch:   654/  792, ite: 65331] train loss: 4.422336, tar: 0.452600 
l0: 0.503897, l1: 0.499309, l2: 0.501724, l3: 0.512718, l4: 0.503818, l5: 0.502013, l6: 0.606986

[epoch:  70/100, batch:   656/  792, ite: 65332] train loss: 4.422165, tar: 0.452638 
l0: 0.716045, l1: 0.732455, l2: 0.732855, l3: 0.732980, l4: 0.754403, l5: 0.755036, l6: 0.859974

[epoch:  70/100, batch:   658/  792, ite: 65333] train loss: 4.423516, tar: 0.452836 
l0: 0.704531, l1: 0.705880, l2: 0.701618, l3: 0.708321, l4: 0.750659, l5: 0.855173, l6: 0.972632

[epoch:  70/100, batch:   660/  792, ite: 65334] train loss: 4.425034, tar: 0.453024 
l0: 0.216107, l1: 0.218148, l2: 0.214643, l3: 0.219287, l4: 0.247674, l5: 0.401583, l6: 0.557981

[epoch:  70/100, batch:   662/  792, ite: 65335] train loss: 4.423746, tar: 0.452847 
l0: 0.648464, l1: 0.655938, l2: 0.656810, l3: 0.663533, l4: 0.674139, l5: 0.795834, l6: 0.896254

[epoch:  70/100, batch:   664/  792, ite: 65336] train loss: 4.424898, tar: 0.45

[epoch:  70/100, batch:   744/  792, ite: 65376] train loss: 4.418227, tar: 0.451984 
l0: 0.356314, l1: 0.355734, l2: 0.358305, l3: 0.362822, l4: 0.381176, l5: 0.440823, l6: 0.575487

[epoch:  70/100, batch:   746/  792, ite: 65377] train loss: 4.417549, tar: 0.451914 
l0: 0.259452, l1: 0.264335, l2: 0.265902, l3: 0.271651, l4: 0.316621, l5: 0.405121, l6: 0.471865

[epoch:  70/100, batch:   748/  792, ite: 65378] train loss: 4.416360, tar: 0.451774 
l0: 0.246031, l1: 0.253274, l2: 0.259351, l3: 0.270517, l4: 0.280214, l5: 0.400385, l6: 0.472084

[epoch:  70/100, batch:   750/  792, ite: 65379] train loss: 4.415097, tar: 0.451625 
l0: 0.387483, l1: 0.390675, l2: 0.391836, l3: 0.404393, l4: 0.426038, l5: 0.465883, l6: 0.642105

[epoch:  70/100, batch:   752/  792, ite: 65380] train loss: 4.414637, tar: 0.451579 
l0: 0.313305, l1: 0.316317, l2: 0.314520, l3: 0.320520, l4: 0.355117, l5: 0.457933, l6: 0.555753

[epoch:  70/100, batch:   754/  792, ite: 65381] train loss: 4.413762, tar: 0.45

l0: 0.416081, l1: 0.424437, l2: 0.427295, l3: 0.431157, l4: 0.472898, l5: 0.546360, l6: 0.598315

[epoch:  71/100, batch:    42/  792, ite: 65421] train loss: 4.421886, tar: 0.452408 
l0: 0.201400, l1: 0.202177, l2: 0.204222, l3: 0.215363, l4: 0.240365, l5: 0.348550, l6: 0.461259

[epoch:  71/100, batch:    44/  792, ite: 65422] train loss: 4.420499, tar: 0.452232 
l0: 0.560207, l1: 0.564426, l2: 0.565173, l3: 0.564814, l4: 0.571374, l5: 0.578429, l6: 0.734565

[epoch:  71/100, batch:    46/  792, ite: 65423] train loss: 4.420852, tar: 0.452308 
l0: 0.449763, l1: 0.453026, l2: 0.453626, l3: 0.444175, l4: 0.442149, l5: 0.548739, l6: 0.643898

[epoch:  71/100, batch:    48/  792, ite: 65424] train loss: 4.420701, tar: 0.452306 
l0: 0.598409, l1: 0.606415, l2: 0.604488, l3: 0.604698, l4: 0.603997, l5: 0.651848, l6: 0.793827

[epoch:  71/100, batch:    50/  792, ite: 65425] train loss: 4.421327, tar: 0.452408 
l0: 0.272775, l1: 0.269485, l2: 0.269308, l3: 0.274592, l4: 0.287005, l5: 0.3581

[epoch:  71/100, batch:   130/  792, ite: 65465] train loss: 4.416608, tar: 0.452046 
l0: 0.271460, l1: 0.271980, l2: 0.272083, l3: 0.271508, l4: 0.310210, l5: 0.479597, l6: 0.615257

[epoch:  71/100, batch:   132/  792, ite: 65466] train loss: 4.415732, tar: 0.451923 
l0: 0.827586, l1: 0.827837, l2: 0.826624, l3: 0.828329, l4: 0.854513, l5: 0.959696, l6: 1.233355

[epoch:  71/100, batch:   134/  792, ite: 65467] train loss: 4.417967, tar: 0.452179 
l0: 0.447910, l1: 0.441913, l2: 0.440660, l3: 0.448057, l4: 0.449097, l5: 0.509572, l6: 0.624133

[epoch:  71/100, batch:   136/  792, ite: 65468] train loss: 4.417736, tar: 0.452176 
l0: 0.506941, l1: 0.503121, l2: 0.502860, l3: 0.514601, l4: 0.536754, l5: 0.683824, l6: 0.868570

[epoch:  71/100, batch:   138/  792, ite: 65469] train loss: 4.418155, tar: 0.452213 
l0: 0.739970, l1: 0.743459, l2: 0.740608, l3: 0.738566, l4: 0.754340, l5: 0.800905, l6: 0.861564

[epoch:  71/100, batch:   140/  792, ite: 65470] train loss: 4.419421, tar: 0.45

[epoch:  71/100, batch:   220/  792, ite: 65510] train loss: 4.408862, tar: 0.451046 
l0: 0.800781, l1: 0.801276, l2: 0.797038, l3: 0.804216, l4: 0.851456, l5: 0.920071, l6: 1.124327

[epoch:  71/100, batch:   222/  792, ite: 65511] train loss: 4.410821, tar: 0.451277 
l0: 0.267511, l1: 0.269927, l2: 0.271709, l3: 0.285254, l4: 0.318889, l5: 0.379555, l6: 0.457190

[epoch:  71/100, batch:   224/  792, ite: 65512] train loss: 4.409729, tar: 0.451156 
l0: 0.659331, l1: 0.664660, l2: 0.665303, l3: 0.663358, l4: 0.738221, l5: 0.784090, l6: 0.987048

[epoch:  71/100, batch:   226/  792, ite: 65513] train loss: 4.410947, tar: 0.451293 
l0: 0.417820, l1: 0.414840, l2: 0.416048, l3: 0.412588, l4: 0.423276, l5: 0.524552, l6: 0.728214

[epoch:  71/100, batch:   228/  792, ite: 65514] train loss: 4.410804, tar: 0.451271 
l0: 0.369022, l1: 0.372941, l2: 0.375396, l3: 0.375286, l4: 0.403335, l5: 0.517239, l6: 0.727141

[epoch:  71/100, batch:   230/  792, ite: 65515] train loss: 4.410468, tar: 0.45

[epoch:  71/100, batch:   310/  792, ite: 65555] train loss: 4.402367, tar: 0.450322 
l0: 0.788979, l1: 0.803483, l2: 0.803338, l3: 0.808163, l4: 0.840402, l5: 0.867036, l6: 0.851368

[epoch:  71/100, batch:   312/  792, ite: 65556] train loss: 4.403830, tar: 0.450539 
l0: 0.318709, l1: 0.321324, l2: 0.324563, l3: 0.324452, l4: 0.332394, l5: 0.353355, l6: 0.518918

[epoch:  71/100, batch:   314/  792, ite: 65557] train loss: 4.403007, tar: 0.450455 
l0: 0.323245, l1: 0.317733, l2: 0.314035, l3: 0.318277, l4: 0.356978, l5: 0.477384, l6: 0.549345

[epoch:  71/100, batch:   316/  792, ite: 65558] train loss: 4.402305, tar: 0.450373 
l0: 0.239412, l1: 0.243363, l2: 0.244091, l3: 0.254067, l4: 0.285914, l5: 0.368456, l6: 0.627686

[epoch:  71/100, batch:   318/  792, ite: 65559] train loss: 4.401366, tar: 0.450238 
l0: 0.547369, l1: 0.552019, l2: 0.548045, l3: 0.546809, l4: 0.584859, l5: 0.737994, l6: 0.766567

[epoch:  71/100, batch:   320/  792, ite: 65560] train loss: 4.401725, tar: 0.45

[epoch:  71/100, batch:   400/  792, ite: 65600] train loss: 4.397729, tar: 0.449899 
l0: 0.549420, l1: 0.547598, l2: 0.544526, l3: 0.538870, l4: 0.541319, l5: 0.574212, l6: 0.751364

[epoch:  71/100, batch:   402/  792, ite: 65601] train loss: 4.398017, tar: 0.449962 
l0: 0.439130, l1: 0.445470, l2: 0.446708, l3: 0.450651, l4: 0.477046, l5: 0.618754, l6: 0.763319

[epoch:  71/100, batch:   404/  792, ite: 65602] train loss: 4.398050, tar: 0.449955 
l0: 0.337262, l1: 0.341929, l2: 0.341694, l3: 0.352512, l4: 0.372549, l5: 0.492935, l6: 0.600981

[epoch:  71/100, batch:   406/  792, ite: 65603] train loss: 4.397595, tar: 0.449884 
l0: 1.024892, l1: 1.024152, l2: 1.024840, l3: 1.031841, l4: 1.070624, l5: 1.178919, l6: 1.355779

[epoch:  71/100, batch:   408/  792, ite: 65604] train loss: 4.400524, tar: 0.450243 
l0: 0.235068, l1: 0.243070, l2: 0.243311, l3: 0.253312, l4: 0.279279, l5: 0.390870, l6: 0.552987

[epoch:  71/100, batch:   410/  792, ite: 65605] train loss: 4.399538, tar: 0.45

[epoch:  71/100, batch:   490/  792, ite: 65645] train loss: 4.398572, tar: 0.449869 
l0: 0.293395, l1: 0.298778, l2: 0.300757, l3: 0.309508, l4: 0.347873, l5: 0.540526, l6: 0.822203

[epoch:  71/100, batch:   492/  792, ite: 65646] train loss: 4.398181, tar: 0.449774 
l0: 0.269651, l1: 0.268532, l2: 0.272528, l3: 0.272870, l4: 0.289219, l5: 0.327003, l6: 0.479558

[epoch:  71/100, batch:   494/  792, ite: 65647] train loss: 4.397143, tar: 0.449665 
l0: 0.426344, l1: 0.430716, l2: 0.430998, l3: 0.432362, l4: 0.452110, l5: 0.627496, l6: 0.847343

[epoch:  71/100, batch:   496/  792, ite: 65648] train loss: 4.397233, tar: 0.449651 
l0: 0.357551, l1: 0.359944, l2: 0.359789, l3: 0.358855, l4: 0.394004, l5: 0.473199, l6: 0.642179

[epoch:  71/100, batch:   498/  792, ite: 65649] train loss: 4.396737, tar: 0.449595 
l0: 0.262578, l1: 0.267369, l2: 0.266660, l3: 0.271900, l4: 0.312769, l5: 0.433637, l6: 0.527206

[epoch:  71/100, batch:   500/  792, ite: 65650] train loss: 4.395829, tar: 0.44

[epoch:  71/100, batch:   580/  792, ite: 65690] train loss: 4.395018, tar: 0.449399 
l0: 0.639437, l1: 0.632307, l2: 0.631029, l3: 0.630793, l4: 0.647883, l5: 0.680542, l6: 0.860107

[epoch:  71/100, batch:   582/  792, ite: 65691] train loss: 4.395776, tar: 0.449511 
l0: 0.432844, l1: 0.430309, l2: 0.428695, l3: 0.429183, l4: 0.478377, l5: 0.604988, l6: 0.680735

[epoch:  71/100, batch:   584/  792, ite: 65692] train loss: 4.395645, tar: 0.449501 
l0: 0.440896, l1: 0.443969, l2: 0.443759, l3: 0.434623, l4: 0.420079, l5: 0.533259, l6: 0.602865

[epoch:  71/100, batch:   586/  792, ite: 65693] train loss: 4.395398, tar: 0.449496 
l0: 0.318528, l1: 0.321796, l2: 0.320650, l3: 0.333038, l4: 0.345638, l5: 0.453467, l6: 0.597228

[epoch:  71/100, batch:   588/  792, ite: 65694] train loss: 4.394767, tar: 0.449419 
l0: 0.412378, l1: 0.410620, l2: 0.411175, l3: 0.417891, l4: 0.437034, l5: 0.456000, l6: 0.525253

[epoch:  71/100, batch:   590/  792, ite: 65695] train loss: 4.394309, tar: 0.44

[epoch:  71/100, batch:   670/  792, ite: 65735] train loss: 4.399485, tar: 0.449928 
l0: 0.398523, l1: 0.396035, l2: 0.398964, l3: 0.400448, l4: 0.410043, l5: 0.470989, l6: 0.619892

[epoch:  71/100, batch:   672/  792, ite: 65736] train loss: 4.399111, tar: 0.449898 
l0: 0.410572, l1: 0.414081, l2: 0.417379, l3: 0.418227, l4: 0.440373, l5: 0.476644, l6: 0.651816

[epoch:  71/100, batch:   674/  792, ite: 65737] train loss: 4.398857, tar: 0.449876 
l0: 0.389720, l1: 0.390909, l2: 0.385210, l3: 0.387754, l4: 0.412576, l5: 0.492154, l6: 0.676998

[epoch:  71/100, batch:   676/  792, ite: 65738] train loss: 4.398542, tar: 0.449841 
l0: 0.402949, l1: 0.405136, l2: 0.404510, l3: 0.400676, l4: 0.431105, l5: 0.559580, l6: 0.739393

[epoch:  71/100, batch:   678/  792, ite: 65739] train loss: 4.398356, tar: 0.449814 
l0: 0.457441, l1: 0.462902, l2: 0.462074, l3: 0.460330, l4: 0.474090, l5: 0.548076, l6: 0.778919

[epoch:  71/100, batch:   680/  792, ite: 65740] train loss: 4.398439, tar: 0.44

[epoch:  71/100, batch:   760/  792, ite: 65780] train loss: 4.403100, tar: 0.450506 
l0: 0.595922, l1: 0.610827, l2: 0.609341, l3: 0.606847, l4: 0.629580, l5: 0.701869, l6: 0.813502

[epoch:  71/100, batch:   762/  792, ite: 65781] train loss: 4.403695, tar: 0.450588 
l0: 0.403095, l1: 0.400970, l2: 0.399422, l3: 0.399831, l4: 0.416715, l5: 0.489220, l6: 0.596936

[epoch:  71/100, batch:   764/  792, ite: 65782] train loss: 4.403399, tar: 0.450561 
l0: 0.410644, l1: 0.411556, l2: 0.412854, l3: 0.422312, l4: 0.427405, l5: 0.468899, l6: 0.648802

[epoch:  71/100, batch:   766/  792, ite: 65783] train loss: 4.403109, tar: 0.450539 
l0: 0.970355, l1: 0.993154, l2: 0.988988, l3: 0.978963, l4: 1.026817, l5: 1.109501, l6: 1.171676

[epoch:  71/100, batch:   768/  792, ite: 65784] train loss: 4.405410, tar: 0.450830 
l0: 0.278173, l1: 0.278933, l2: 0.280247, l3: 0.283546, l4: 0.286005, l5: 0.382906, l6: 0.507921

[epoch:  71/100, batch:   770/  792, ite: 65785] train loss: 4.404530, tar: 0.45

l0: 0.619036, l1: 0.626477, l2: 0.626601, l3: 0.623398, l4: 0.639782, l5: 0.721889, l6: 0.802320

[epoch:  72/100, batch:    58/  792, ite: 65825] train loss: 4.406971, tar: 0.451127 
l0: 0.834975, l1: 0.841209, l2: 0.834160, l3: 0.844445, l4: 0.829406, l5: 0.841116, l6: 0.874425

[epoch:  72/100, batch:    60/  792, ite: 65826] train loss: 4.408345, tar: 0.451337 
l0: 0.314291, l1: 0.319480, l2: 0.319437, l3: 0.324968, l4: 0.347941, l5: 0.469991, l6: 0.736330

[epoch:  72/100, batch:    62/  792, ite: 65827] train loss: 4.407953, tar: 0.451262 
l0: 0.354955, l1: 0.353960, l2: 0.357682, l3: 0.364809, l4: 0.396113, l5: 0.441913, l6: 0.536116

[epoch:  72/100, batch:    64/  792, ite: 65828] train loss: 4.407397, tar: 0.451210 
l0: 0.293770, l1: 0.290129, l2: 0.289270, l3: 0.289537, l4: 0.306546, l5: 0.339100, l6: 0.539697

[epoch:  72/100, batch:    66/  792, ite: 65829] train loss: 4.406572, tar: 0.451124 
l0: 0.466953, l1: 0.468797, l2: 0.471178, l3: 0.472232, l4: 0.539119, l5: 0.6676

[epoch:  72/100, batch:   146/  792, ite: 65869] train loss: 4.403859, tar: 0.450787 
l0: 0.288936, l1: 0.290869, l2: 0.292401, l3: 0.299105, l4: 0.303047, l5: 0.468183, l6: 0.643471

[epoch:  72/100, batch:   148/  792, ite: 65870] train loss: 4.403327, tar: 0.450700 
l0: 0.558037, l1: 0.559138, l2: 0.557875, l3: 0.561428, l4: 0.571738, l5: 0.625121, l6: 0.753345

[epoch:  72/100, batch:   150/  792, ite: 65871] train loss: 4.403674, tar: 0.450758 
l0: 0.452018, l1: 0.458411, l2: 0.454672, l3: 0.450345, l4: 0.469146, l5: 0.550657, l6: 0.666158

[epoch:  72/100, batch:   152/  792, ite: 65872] train loss: 4.403615, tar: 0.450758 
l0: 0.608239, l1: 0.610027, l2: 0.614201, l3: 0.608544, l4: 0.636085, l5: 0.711955, l6: 1.051013

[epoch:  72/100, batch:   154/  792, ite: 65873] train loss: 4.404383, tar: 0.450842 
l0: 0.478832, l1: 0.474965, l2: 0.477774, l3: 0.476504, l4: 0.513616, l5: 0.684741, l6: 0.909247

[epoch:  72/100, batch:   156/  792, ite: 65874] train loss: 4.404685, tar: 0.45

[epoch:  72/100, batch:   236/  792, ite: 65914] train loss: 4.410300, tar: 0.451477 
l0: 0.525065, l1: 0.524565, l2: 0.521805, l3: 0.519646, l4: 0.534585, l5: 0.621953, l6: 0.911649

[epoch:  72/100, batch:   238/  792, ite: 65915] train loss: 4.410598, tar: 0.451516 
l0: 0.406469, l1: 0.411124, l2: 0.415883, l3: 0.413236, l4: 0.430450, l5: 0.596319, l6: 0.716055

[epoch:  72/100, batch:   240/  792, ite: 65916] train loss: 4.410492, tar: 0.451492 
l0: 0.430418, l1: 0.439230, l2: 0.440011, l3: 0.441064, l4: 0.451257, l5: 0.479426, l6: 0.591688

[epoch:  72/100, batch:   242/  792, ite: 65917] train loss: 4.410258, tar: 0.451481 
l0: 0.391930, l1: 0.404902, l2: 0.406101, l3: 0.415057, l4: 0.447401, l5: 0.613018, l6: 0.738050

[epoch:  72/100, batch:   244/  792, ite: 65918] train loss: 4.410154, tar: 0.451450 
l0: 0.337547, l1: 0.333462, l2: 0.333964, l3: 0.338969, l4: 0.365903, l5: 0.512459, l6: 0.687347

[epoch:  72/100, batch:   246/  792, ite: 65919] train loss: 4.409740, tar: 0.45

[epoch:  72/100, batch:   326/  792, ite: 65959] train loss: 4.406035, tar: 0.451028 
l0: 0.350054, l1: 0.346983, l2: 0.342902, l3: 0.347872, l4: 0.355253, l5: 0.443279, l6: 0.596372

[epoch:  72/100, batch:   328/  792, ite: 65960] train loss: 4.405507, tar: 0.450976 
l0: 0.434051, l1: 0.435145, l2: 0.437193, l3: 0.455245, l4: 0.485011, l5: 0.541706, l6: 0.607442

[epoch:  72/100, batch:   330/  792, ite: 65961] train loss: 4.405296, tar: 0.450967 
l0: 0.352150, l1: 0.352386, l2: 0.351823, l3: 0.354363, l4: 0.368178, l5: 0.459643, l6: 0.579366

[epoch:  72/100, batch:   332/  792, ite: 65962] train loss: 4.404789, tar: 0.450917 
l0: 0.239592, l1: 0.240031, l2: 0.238680, l3: 0.240517, l4: 0.272145, l5: 0.359159, l6: 0.416334

[epoch:  72/100, batch:   334/  792, ite: 65963] train loss: 4.403791, tar: 0.450809 
l0: 0.351252, l1: 0.354765, l2: 0.352752, l3: 0.360742, l4: 0.398195, l5: 0.472781, l6: 0.693265

[epoch:  72/100, batch:   336/  792, ite: 65964] train loss: 4.403496, tar: 0.45

[epoch:  72/100, batch:   416/  792, ite: 66004] train loss: 5.383932, tar: 0.570214 
l0: 0.455020, l1: 0.467778, l2: 0.469073, l3: 0.470290, l4: 0.493826, l5: 0.552978, l6: 0.664051

[epoch:  72/100, batch:   418/  792, ite: 66005] train loss: 5.175253, tar: 0.547175 
l0: 0.523499, l1: 0.526271, l2: 0.528339, l3: 0.535453, l4: 0.585025, l5: 0.672707, l6: 0.823774

[epoch:  72/100, batch:   420/  792, ite: 66006] train loss: 5.182596, tar: 0.543229 
l0: 0.480034, l1: 0.483058, l2: 0.483279, l3: 0.488816, l4: 0.498182, l5: 0.633794, l6: 0.781454

[epoch:  72/100, batch:   422/  792, ite: 66007] train loss: 5.117695, tar: 0.534201 
l0: 0.214670, l1: 0.211105, l2: 0.217828, l3: 0.224402, l4: 0.241649, l5: 0.335599, l6: 0.496781

[epoch:  72/100, batch:   424/  792, ite: 66008] train loss: 4.786846, tar: 0.494260 
l0: 0.274825, l1: 0.276487, l2: 0.275883, l3: 0.278575, l4: 0.299718, l5: 0.406148, l6: 0.654104

[epoch:  72/100, batch:   426/  792, ite: 66009] train loss: 4.601923, tar: 0.46

[epoch:  72/100, batch:   506/  792, ite: 66049] train loss: 4.374935, tar: 0.451089 
l0: 0.283705, l1: 0.280549, l2: 0.284787, l3: 0.291835, l4: 0.305122, l5: 0.421537, l6: 0.702440

[epoch:  72/100, batch:   508/  792, ite: 66050] train loss: 4.351789, tar: 0.447741 
l0: 0.306262, l1: 0.308919, l2: 0.309960, l3: 0.314240, l4: 0.359804, l5: 0.519297, l6: 0.776196

[epoch:  72/100, batch:   510/  792, ite: 66051] train loss: 4.339268, tar: 0.444967 
l0: 0.830067, l1: 0.846538, l2: 0.842245, l3: 0.834936, l4: 0.832604, l5: 0.833232, l6: 1.107139

[epoch:  72/100, batch:   512/  792, ite: 66052] train loss: 4.397641, tar: 0.452373 
l0: 0.296875, l1: 0.299027, l2: 0.298435, l3: 0.302182, l4: 0.332148, l5: 0.454358, l6: 0.525830

[epoch:  72/100, batch:   514/  792, ite: 66053] train loss: 4.373770, tar: 0.449439 
l0: 0.633844, l1: 0.645883, l2: 0.646294, l3: 0.650107, l4: 0.690948, l5: 0.833410, l6: 1.129508

[epoch:  72/100, batch:   516/  792, ite: 66054] train loss: 4.411818, tar: 0.45

[epoch:  72/100, batch:   596/  792, ite: 66094] train loss: 4.648002, tar: 0.480158 
l0: 0.224014, l1: 0.229211, l2: 0.232236, l3: 0.244777, l4: 0.274245, l5: 0.411647, l6: 0.639256

[epoch:  72/100, batch:   598/  792, ite: 66095] train loss: 4.630006, tar: 0.477462 
l0: 0.398219, l1: 0.405845, l2: 0.404108, l3: 0.405937, l4: 0.438596, l5: 0.594795, l6: 0.873512

[epoch:  72/100, batch:   600/  792, ite: 66096] train loss: 4.628273, tar: 0.476636 
l0: 0.351737, l1: 0.354967, l2: 0.356116, l3: 0.361144, l4: 0.369585, l5: 0.380441, l6: 0.509706

[epoch:  72/100, batch:   602/  792, ite: 66097] train loss: 4.614538, tar: 0.475349 
l0: 0.277108, l1: 0.292916, l2: 0.296064, l3: 0.302552, l4: 0.364954, l5: 0.481229, l6: 0.772917

[epoch:  72/100, batch:   604/  792, ite: 66098] train loss: 4.605149, tar: 0.473326 
l0: 0.371752, l1: 0.368836, l2: 0.368756, l3: 0.366878, l4: 0.395984, l5: 0.535792, l6: 0.682578

[epoch:  72/100, batch:   606/  792, ite: 66099] train loss: 4.596743, tar: 0.47

[epoch:  72/100, batch:   686/  792, ite: 66139] train loss: 4.443568, tar: 0.453446 
l0: 0.344137, l1: 0.349807, l2: 0.348998, l3: 0.349100, l4: 0.376964, l5: 0.543697, l6: 0.652254

[epoch:  72/100, batch:   688/  792, ite: 66140] train loss: 4.437856, tar: 0.452665 
l0: 0.417657, l1: 0.430281, l2: 0.424653, l3: 0.412626, l4: 0.418929, l5: 0.474343, l6: 0.760980

[epoch:  72/100, batch:   690/  792, ite: 66141] train loss: 4.435370, tar: 0.452416 
l0: 0.248760, l1: 0.250338, l2: 0.248449, l3: 0.259844, l4: 0.290774, l5: 0.383463, l6: 0.519094

[epoch:  72/100, batch:   692/  792, ite: 66142] train loss: 4.423549, tar: 0.450982 
l0: 0.392348, l1: 0.394685, l2: 0.392523, l3: 0.385871, l4: 0.413799, l5: 0.508786, l6: 0.620503

[epoch:  72/100, batch:   694/  792, ite: 66143] train loss: 4.419247, tar: 0.450572 
l0: 0.227829, l1: 0.232353, l2: 0.231598, l3: 0.236451, l4: 0.254577, l5: 0.328454, l6: 0.445139

[epoch:  72/100, batch:   696/  792, ite: 66144] train loss: 4.405415, tar: 0.44

[epoch:  72/100, batch:   776/  792, ite: 66184] train loss: 4.390225, tar: 0.447472 
l0: 0.259179, l1: 0.260540, l2: 0.260782, l3: 0.269387, l4: 0.295419, l5: 0.372766, l6: 0.524105

[epoch:  72/100, batch:   778/  792, ite: 66185] train loss: 4.381798, tar: 0.446455 
l0: 0.291329, l1: 0.289576, l2: 0.292402, l3: 0.296960, l4: 0.319382, l5: 0.395638, l6: 0.548952

[epoch:  72/100, batch:   780/  792, ite: 66186] train loss: 4.374700, tar: 0.445621 
l0: 0.420219, l1: 0.419155, l2: 0.420093, l3: 0.429717, l4: 0.448533, l5: 0.540902, l6: 0.639479

[epoch:  72/100, batch:   782/  792, ite: 66187] train loss: 4.372741, tar: 0.445485 
l0: 0.427179, l1: 0.425412, l2: 0.425382, l3: 0.424103, l4: 0.453233, l5: 0.554770, l6: 0.732105

[epoch:  72/100, batch:   784/  792, ite: 66188] train loss: 4.371727, tar: 0.445387 
l0: 0.183985, l1: 0.181282, l2: 0.181718, l3: 0.185502, l4: 0.202370, l5: 0.382083, l6: 0.458507

[epoch:  72/100, batch:   786/  792, ite: 66189] train loss: 4.360579, tar: 0.44

l0: 0.599729, l1: 0.609610, l2: 0.606926, l3: 0.606887, l4: 0.612667, l5: 0.712456, l6: 0.813449

[epoch:  73/100, batch:    74/  792, ite: 66229] train loss: 4.324138, tar: 0.439292 
l0: 0.240899, l1: 0.243549, l2: 0.246815, l3: 0.249766, l4: 0.286759, l5: 0.473836, l6: 0.614496

[epoch:  73/100, batch:    76/  792, ite: 66230] train loss: 4.318521, tar: 0.438429 
l0: 0.334567, l1: 0.333610, l2: 0.331838, l3: 0.330913, l4: 0.361415, l5: 0.496105, l6: 0.565219

[epoch:  73/100, batch:    78/  792, ite: 66231] train loss: 4.314446, tar: 0.437980 
l0: 0.404893, l1: 0.407017, l2: 0.409353, l3: 0.408623, l4: 0.420432, l5: 0.513212, l6: 0.589176

[epoch:  73/100, batch:    80/  792, ite: 66232] train loss: 4.312214, tar: 0.437837 
l0: 0.311480, l1: 0.312695, l2: 0.313070, l3: 0.318952, l4: 0.346295, l5: 0.467426, l6: 0.566304

[epoch:  73/100, batch:    82/  792, ite: 66233] train loss: 4.307897, tar: 0.437295 
l0: 0.308336, l1: 0.311545, l2: 0.308775, l3: 0.315844, l4: 0.354597, l5: 0.4254

[epoch:  73/100, batch:   162/  792, ite: 66273] train loss: 4.321412, tar: 0.439006 
l0: 0.391960, l1: 0.392728, l2: 0.394854, l3: 0.394418, l4: 0.424212, l5: 0.514881, l6: 0.633245

[epoch:  73/100, batch:   164/  792, ite: 66274] train loss: 4.319665, tar: 0.438834 
l0: 0.551137, l1: 0.565544, l2: 0.560265, l3: 0.549831, l4: 0.569631, l5: 0.649876, l6: 0.719800

[epoch:  73/100, batch:   166/  792, ite: 66275] train loss: 4.322006, tar: 0.439242 
l0: 0.612565, l1: 0.611265, l2: 0.609696, l3: 0.617533, l4: 0.642957, l5: 0.711232, l6: 0.960385

[epoch:  73/100, batch:   168/  792, ite: 66276] train loss: 4.327617, tar: 0.439870 
l0: 0.712616, l1: 0.730369, l2: 0.728449, l3: 0.725893, l4: 0.725795, l5: 0.735011, l6: 0.810560

[epoch:  73/100, batch:   170/  792, ite: 66277] train loss: 4.333791, tar: 0.440855 
l0: 0.690807, l1: 0.702438, l2: 0.699174, l3: 0.699020, l4: 0.725242, l5: 0.796674, l6: 1.039976

[epoch:  73/100, batch:   172/  792, ite: 66278] train loss: 4.341323, tar: 0.44

[epoch:  73/100, batch:   252/  792, ite: 66318] train loss: 4.317040, tar: 0.439593 
l0: 0.631447, l1: 0.635196, l2: 0.633073, l3: 0.637393, l4: 0.654177, l5: 0.740739, l6: 0.875332

[epoch:  73/100, batch:   254/  792, ite: 66319] train loss: 4.321529, tar: 0.440195 
l0: 0.347213, l1: 0.348633, l2: 0.351174, l3: 0.360776, l4: 0.398720, l5: 0.523510, l6: 0.605601

[epoch:  73/100, batch:   256/  792, ite: 66320] train loss: 4.319376, tar: 0.439904 
l0: 0.615230, l1: 0.609028, l2: 0.608676, l3: 0.603966, l4: 0.621293, l5: 0.633227, l6: 0.718530

[epoch:  73/100, batch:   258/  792, ite: 66321] train loss: 4.322238, tar: 0.440450 
l0: 0.417559, l1: 0.418652, l2: 0.419741, l3: 0.415863, l4: 0.456004, l5: 0.610399, l6: 0.827798

[epoch:  73/100, batch:   260/  792, ite: 66322] train loss: 4.322823, tar: 0.440379 
l0: 0.233889, l1: 0.234134, l2: 0.236914, l3: 0.245585, l4: 0.283169, l5: 0.432236, l6: 0.648964

[epoch:  73/100, batch:   262/  792, ite: 66323] train loss: 4.318770, tar: 0.43

[epoch:  73/100, batch:   342/  792, ite: 66363] train loss: 4.346675, tar: 0.443290 
l0: 0.642818, l1: 0.647761, l2: 0.649562, l3: 0.654746, l4: 0.652441, l5: 0.646295, l6: 0.828925

[epoch:  73/100, batch:   344/  792, ite: 66364] train loss: 4.349977, tar: 0.443838 
l0: 0.635063, l1: 0.642202, l2: 0.642072, l3: 0.645392, l4: 0.661680, l5: 0.726189, l6: 0.866715

[epoch:  73/100, batch:   346/  792, ite: 66365] train loss: 4.353602, tar: 0.444362 
l0: 0.491385, l1: 0.498518, l2: 0.493893, l3: 0.491454, l4: 0.505730, l5: 0.657235, l6: 0.798746

[epoch:  73/100, batch:   348/  792, ite: 66366] train loss: 4.355200, tar: 0.444491 
l0: 0.390600, l1: 0.386438, l2: 0.385017, l3: 0.382538, l4: 0.375776, l5: 0.432553, l6: 0.440157

[epoch:  73/100, batch:   350/  792, ite: 66367] train loss: 4.352489, tar: 0.444344 
l0: 0.405729, l1: 0.407253, l2: 0.407556, l3: 0.409725, l4: 0.430094, l5: 0.538456, l6: 0.636580

[epoch:  73/100, batch:   352/  792, ite: 66368] train loss: 4.351458, tar: 0.44

[epoch:  73/100, batch:   432/  792, ite: 66408] train loss: 4.330424, tar: 0.441836 
l0: 0.380416, l1: 0.382142, l2: 0.384693, l3: 0.388312, l4: 0.405270, l5: 0.552457, l6: 0.664044

[epoch:  73/100, batch:   434/  792, ite: 66409] train loss: 4.329376, tar: 0.441686 
l0: 0.374931, l1: 0.382047, l2: 0.378984, l3: 0.382192, l4: 0.407709, l5: 0.568184, l6: 0.827241

[epoch:  73/100, batch:   436/  792, ite: 66410] train loss: 4.328996, tar: 0.441523 
l0: 0.200003, l1: 0.201787, l2: 0.206265, l3: 0.202131, l4: 0.218626, l5: 0.300727, l6: 0.420092

[epoch:  73/100, batch:   438/  792, ite: 66411] train loss: 4.323959, tar: 0.440936 
l0: 0.320584, l1: 0.318335, l2: 0.317266, l3: 0.320868, l4: 0.333795, l5: 0.346085, l6: 0.567604

[epoch:  73/100, batch:   440/  792, ite: 66412] train loss: 4.321121, tar: 0.440644 
l0: 0.340332, l1: 0.337823, l2: 0.340516, l3: 0.348948, l4: 0.381117, l5: 0.481346, l6: 0.640321

[epoch:  73/100, batch:   442/  792, ite: 66413] train loss: 4.319194, tar: 0.44

[epoch:  73/100, batch:   522/  792, ite: 66453] train loss: 4.296983, tar: 0.437457 
l0: 0.542711, l1: 0.551036, l2: 0.551110, l3: 0.554785, l4: 0.602688, l5: 0.670398, l6: 1.335045

[epoch:  73/100, batch:   524/  792, ite: 66454] train loss: 4.300735, tar: 0.437689 
l0: 0.290063, l1: 0.292203, l2: 0.291345, l3: 0.291398, l4: 0.309259, l5: 0.384014, l6: 0.534954

[epoch:  73/100, batch:   526/  792, ite: 66455] train loss: 4.297961, tar: 0.437365 
l0: 0.382721, l1: 0.380110, l2: 0.382090, l3: 0.392249, l4: 0.391810, l5: 0.507619, l6: 0.654012

[epoch:  73/100, batch:   528/  792, ite: 66456] train loss: 4.297043, tar: 0.437245 
l0: 0.257726, l1: 0.261149, l2: 0.263206, l3: 0.267928, l4: 0.316896, l5: 0.500582, l6: 0.587677

[epoch:  73/100, batch:   530/  792, ite: 66457] train loss: 4.294524, tar: 0.436852 
l0: 0.375643, l1: 0.374332, l2: 0.374909, l3: 0.379324, l4: 0.409696, l5: 0.532327, l6: 0.787461

[epoch:  73/100, batch:   532/  792, ite: 66458] train loss: 4.293862, tar: 0.43

[epoch:  73/100, batch:   612/  792, ite: 66498] train loss: 4.291300, tar: 0.436310 
l0: 0.264155, l1: 0.268456, l2: 0.270776, l3: 0.283587, l4: 0.312342, l5: 0.411584, l6: 0.513908

[epoch:  73/100, batch:   614/  792, ite: 66499] train loss: 4.288555, tar: 0.435965 
l0: 0.380232, l1: 0.387247, l2: 0.391785, l3: 0.405467, l4: 0.433589, l5: 0.594701, l6: 0.756546

[epoch:  73/100, batch:   616/  792, ite: 66500] train loss: 4.288116, tar: 0.435853 
l0: 0.310195, l1: 0.315086, l2: 0.313455, l3: 0.315469, l4: 0.334286, l5: 0.440001, l6: 0.690487

[epoch:  73/100, batch:   618/  792, ite: 66501] train loss: 4.286386, tar: 0.435602 
l0: 0.516344, l1: 0.518736, l2: 0.516098, l3: 0.522461, l4: 0.570597, l5: 0.691429, l6: 0.807420

[epoch:  73/100, batch:   620/  792, ite: 66502] train loss: 4.287883, tar: 0.435763 
l0: 0.737207, l1: 0.742025, l2: 0.745780, l3: 0.753604, l4: 0.779031, l5: 0.808895, l6: 0.839596

[epoch:  73/100, batch:   622/  792, ite: 66503] train loss: 4.291980, tar: 0.43

[epoch:  73/100, batch:   702/  792, ite: 66543] train loss: 4.305748, tar: 0.438188 
l0: 0.485640, l1: 0.486926, l2: 0.484648, l3: 0.482538, l4: 0.490256, l5: 0.603272, l6: 0.856624

[epoch:  73/100, batch:   704/  792, ite: 66544] train loss: 4.306617, tar: 0.438275 
l0: 0.393014, l1: 0.400045, l2: 0.400145, l3: 0.386282, l4: 0.424142, l5: 0.466993, l6: 0.530035

[epoch:  73/100, batch:   706/  792, ite: 66545] train loss: 4.305343, tar: 0.438192 
l0: 0.176140, l1: 0.180730, l2: 0.181022, l3: 0.189743, l4: 0.238808, l5: 0.391806, l6: 0.500170

[epoch:  73/100, batch:   708/  792, ite: 66546] train loss: 4.302132, tar: 0.437712 
l0: 0.346708, l1: 0.355008, l2: 0.356588, l3: 0.367773, l4: 0.374567, l5: 0.538824, l6: 0.815975

[epoch:  73/100, batch:   710/  792, ite: 66547] train loss: 4.301730, tar: 0.437546 
l0: 0.354996, l1: 0.354061, l2: 0.357005, l3: 0.373350, l4: 0.423699, l5: 0.489477, l6: 0.648269

[epoch:  73/100, batch:   712/  792, ite: 66548] train loss: 4.300592, tar: 0.43

[epoch:  73/100, batch:   792/  792, ite: 66588] train loss: 4.306951, tar: 0.437707 
Starting epoch 74
Epoch 74 loading complete
l0: 0.434824, l1: 0.437615, l2: 0.433081, l3: 0.442310, l4: 0.552059, l5: 0.714240, l6: 0.886260

[epoch:  74/100, batch:     2/  792, ite: 66589] train loss: 4.307883, tar: 0.437702 
l0: 0.405220, l1: 0.403276, l2: 0.402538, l3: 0.410892, l4: 0.441253, l5: 0.589467, l6: 0.708789

[epoch:  74/100, batch:     4/  792, ite: 66590] train loss: 4.307759, tar: 0.437647 
l0: 0.413393, l1: 0.405597, l2: 0.401847, l3: 0.398232, l4: 0.413517, l5: 0.553683, l6: 0.740566

[epoch:  74/100, batch:     6/  792, ite: 66591] train loss: 4.307469, tar: 0.437606 
l0: 0.211755, l1: 0.219923, l2: 0.222050, l3: 0.231852, l4: 0.262988, l5: 0.373181, l6: 0.555591

[epoch:  74/100, batch:     8/  792, ite: 66592] train loss: 4.304688, tar: 0.437224 
l0: 0.472559, l1: 0.465488, l2: 0.463314, l3: 0.461349, l4: 0.471750, l5: 0.601137, l6: 0.866894

[epoch:  74/100, batch:    10/  792,

l0: 0.505886, l1: 0.492471, l2: 0.496175, l3: 0.500495, l4: 0.535690, l5: 0.624369, l6: 0.835999

[epoch:  74/100, batch:    90/  792, ite: 66633] train loss: 4.327883, tar: 0.439960 
l0: 0.434929, l1: 0.437106, l2: 0.437508, l3: 0.438105, l4: 0.445083, l5: 0.507759, l6: 0.600342

[epoch:  74/100, batch:    92/  792, ite: 66634] train loss: 4.327247, tar: 0.439952 
l0: 0.244242, l1: 0.248251, l2: 0.252071, l3: 0.267512, l4: 0.294668, l5: 0.389949, l6: 0.499596

[epoch:  74/100, batch:    94/  792, ite: 66635] train loss: 4.324880, tar: 0.439644 
l0: 0.737978, l1: 0.747179, l2: 0.747979, l3: 0.757354, l4: 0.794167, l5: 0.792359, l6: 0.875644

[epoch:  74/100, batch:    96/  792, ite: 66636] train loss: 4.327976, tar: 0.440113 
l0: 0.260114, l1: 0.261228, l2: 0.266319, l3: 0.278568, l4: 0.313350, l5: 0.370413, l6: 0.463612

[epoch:  74/100, batch:    98/  792, ite: 66637] train loss: 4.325433, tar: 0.439830 
l0: 0.587900, l1: 0.597949, l2: 0.596711, l3: 0.591246, l4: 0.620583, l5: 0.6453

[epoch:  74/100, batch:   178/  792, ite: 66677] train loss: 4.325430, tar: 0.439756 
l0: 0.363932, l1: 0.363598, l2: 0.362161, l3: 0.364895, l4: 0.369068, l5: 0.449860, l6: 0.568251

[epoch:  74/100, batch:   180/  792, ite: 66678] train loss: 4.324232, tar: 0.439645 
l0: 0.309107, l1: 0.316397, l2: 0.314913, l3: 0.319907, l4: 0.348180, l5: 0.456276, l6: 0.678523

[epoch:  74/100, batch:   182/  792, ite: 66679] train loss: 4.323109, tar: 0.439452 
l0: 0.396391, l1: 0.392213, l2: 0.391471, l3: 0.391417, l4: 0.414668, l5: 0.486715, l6: 0.598177

[epoch:  74/100, batch:   184/  792, ite: 66680] train loss: 4.322194, tar: 0.439389 
l0: 0.663536, l1: 0.679216, l2: 0.678446, l3: 0.687053, l4: 0.702631, l5: 0.703255, l6: 0.796016

[epoch:  74/100, batch:   186/  792, ite: 66681] train loss: 4.324261, tar: 0.439718 
l0: 0.577410, l1: 0.574511, l2: 0.572491, l3: 0.580043, l4: 0.614068, l5: 0.760246, l6: 0.902338

[epoch:  74/100, batch:   188/  792, ite: 66682] train loss: 4.326010, tar: 0.43

[epoch:  74/100, batch:   268/  792, ite: 66722] train loss: 4.324792, tar: 0.439686 
l0: 0.219840, l1: 0.222695, l2: 0.224848, l3: 0.227855, l4: 0.250080, l5: 0.326979, l6: 0.392046

[epoch:  74/100, batch:   270/  792, ite: 66723] train loss: 4.322100, tar: 0.439382 
l0: 0.283559, l1: 0.308533, l2: 0.306971, l3: 0.305652, l4: 0.347399, l5: 0.546362, l6: 0.729909

[epoch:  74/100, batch:   272/  792, ite: 66724] train loss: 4.321252, tar: 0.439167 
l0: 0.389829, l1: 0.391294, l2: 0.390752, l3: 0.401032, l4: 0.430242, l5: 0.529115, l6: 0.925521

[epoch:  74/100, batch:   274/  792, ite: 66725] train loss: 4.321316, tar: 0.439099 
l0: 0.287644, l1: 0.289880, l2: 0.290190, l3: 0.289427, l4: 0.317105, l5: 0.384864, l6: 0.418422

[epoch:  74/100, batch:   276/  792, ite: 66726] train loss: 4.319057, tar: 0.438890 
l0: 0.504232, l1: 0.507395, l2: 0.506711, l3: 0.507551, l4: 0.551847, l5: 0.623882, l6: 0.925322

[epoch:  74/100, batch:   278/  792, ite: 66727] train loss: 4.320063, tar: 0.43

[epoch:  74/100, batch:   358/  792, ite: 66767] train loss: 4.333363, tar: 0.441015 
l0: 0.393071, l1: 0.395686, l2: 0.398118, l3: 0.405464, l4: 0.411806, l5: 0.562632, l6: 0.705350

[epoch:  74/100, batch:   360/  792, ite: 66768] train loss: 4.332814, tar: 0.440952 
l0: 0.700388, l1: 0.695375, l2: 0.692903, l3: 0.704800, l4: 0.740998, l5: 0.782068, l6: 0.912867

[epoch:  74/100, batch:   362/  792, ite: 66769] train loss: 4.335359, tar: 0.441289 
l0: 0.473573, l1: 0.478092, l2: 0.479189, l3: 0.481174, l4: 0.521900, l5: 0.609215, l6: 0.774955

[epoch:  74/100, batch:   364/  792, ite: 66770] train loss: 4.335734, tar: 0.441331 
l0: 0.523236, l1: 0.522510, l2: 0.523801, l3: 0.535356, l4: 0.536196, l5: 0.623824, l6: 0.671411

[epoch:  74/100, batch:   366/  792, ite: 66771] train loss: 4.336153, tar: 0.441438 
l0: 0.446080, l1: 0.446969, l2: 0.448166, l3: 0.449339, l4: 0.454923, l5: 0.506910, l6: 0.532081

[epoch:  74/100, batch:   368/  792, ite: 66772] train loss: 4.335537, tar: 0.44

[epoch:  74/100, batch:   448/  792, ite: 66812] train loss: 4.309225, tar: 0.438111 
l0: 0.429617, l1: 0.425341, l2: 0.425773, l3: 0.438839, l4: 0.480598, l5: 0.567597, l6: 0.746062

[epoch:  74/100, batch:   450/  792, ite: 66813] train loss: 4.309128, tar: 0.438101 
l0: 0.314429, l1: 0.318510, l2: 0.319547, l3: 0.327561, l4: 0.360072, l5: 0.523980, l6: 0.550000

[epoch:  74/100, batch:   452/  792, ite: 66814] train loss: 4.307953, tar: 0.437949 
l0: 0.360292, l1: 0.360645, l2: 0.362403, l3: 0.373541, l4: 0.396231, l5: 0.524914, l6: 0.583922

[epoch:  74/100, batch:   454/  792, ite: 66815] train loss: 4.307151, tar: 0.437853 
l0: 0.354567, l1: 0.350300, l2: 0.352058, l3: 0.348758, l4: 0.339060, l5: 0.422867, l6: 0.547686

[epoch:  74/100, batch:   456/  792, ite: 66816] train loss: 4.305940, tar: 0.437751 
l0: 0.693852, l1: 0.705286, l2: 0.703314, l3: 0.710682, l4: 0.701622, l5: 0.740088, l6: 1.009889

[epoch:  74/100, batch:   458/  792, ite: 66817] train loss: 4.308407, tar: 0.43

[epoch:  74/100, batch:   538/  792, ite: 66857] train loss: 4.304844, tar: 0.437616 
l0: 0.451964, l1: 0.453403, l2: 0.453987, l3: 0.454815, l4: 0.470774, l5: 0.581739, l6: 0.869653

[epoch:  74/100, batch:   540/  792, ite: 66858] train loss: 4.305261, tar: 0.437632 
l0: 0.298011, l1: 0.300869, l2: 0.298358, l3: 0.301039, l4: 0.317555, l5: 0.474980, l6: 0.581500

[epoch:  74/100, batch:   542/  792, ite: 66859] train loss: 4.303993, tar: 0.437470 
l0: 0.520197, l1: 0.524185, l2: 0.527595, l3: 0.533055, l4: 0.562595, l5: 0.573419, l6: 0.655821

[epoch:  74/100, batch:   544/  792, ite: 66860] train loss: 4.304333, tar: 0.437566 
l0: 0.398425, l1: 0.402467, l2: 0.401465, l3: 0.413554, l4: 0.438954, l5: 0.565616, l6: 0.672631

[epoch:  74/100, batch:   546/  792, ite: 66861] train loss: 4.303885, tar: 0.437521 
l0: 0.506509, l1: 0.506326, l2: 0.505717, l3: 0.513471, l4: 0.538388, l5: 0.660366, l6: 0.712078

[epoch:  74/100, batch:   548/  792, ite: 66862] train loss: 4.304346, tar: 0.43

[epoch:  74/100, batch:   628/  792, ite: 66902] train loss: 4.296790, tar: 0.436676 
l0: 0.360504, l1: 0.359920, l2: 0.362291, l3: 0.375535, l4: 0.392951, l5: 0.486024, l6: 0.644987

[epoch:  74/100, batch:   630/  792, ite: 66903] train loss: 4.296141, tar: 0.436591 
l0: 0.311441, l1: 0.315756, l2: 0.314023, l3: 0.320479, l4: 0.339670, l5: 0.422457, l6: 0.557103

[epoch:  74/100, batch:   632/  792, ite: 66904] train loss: 4.294874, tar: 0.436453 
l0: 0.216230, l1: 0.217265, l2: 0.219020, l3: 0.216068, l4: 0.242479, l5: 0.376972, l6: 0.490366

[epoch:  74/100, batch:   634/  792, ite: 66905] train loss: 4.292923, tar: 0.436209 
l0: 0.656631, l1: 0.658712, l2: 0.661577, l3: 0.652581, l4: 0.698022, l5: 0.796345, l6: 1.047436

[epoch:  74/100, batch:   636/  792, ite: 66906] train loss: 4.295134, tar: 0.436453 
l0: 0.187319, l1: 0.195258, l2: 0.194195, l3: 0.199035, l4: 0.215408, l5: 0.324889, l6: 0.412648

[epoch:  74/100, batch:   638/  792, ite: 66907] train loss: 4.292826, tar: 0.43

[epoch:  74/100, batch:   718/  792, ite: 66947] train loss: 4.303367, tar: 0.437593 
l0: 1.139205, l1: 1.125597, l2: 1.124604, l3: 1.135498, l4: 1.156231, l5: 1.158027, l6: 1.197668

[epoch:  74/100, batch:   720/  792, ite: 66948] train loss: 4.308569, tar: 0.438333 
l0: 0.371213, l1: 0.369965, l2: 0.370208, l3: 0.369675, l4: 0.441117, l5: 0.566118, l6: 0.709945

[epoch:  74/100, batch:   722/  792, ite: 66949] train loss: 4.308197, tar: 0.438263 
l0: 0.382915, l1: 0.387232, l2: 0.382343, l3: 0.396807, l4: 0.429935, l5: 0.591635, l6: 0.773191

[epoch:  74/100, batch:   724/  792, ite: 66950] train loss: 4.308098, tar: 0.438204 
l0: 0.295857, l1: 0.299502, l2: 0.301175, l3: 0.305704, l4: 0.323779, l5: 0.511776, l6: 0.634651

[epoch:  74/100, batch:   726/  792, ite: 66951] train loss: 4.307126, tar: 0.438055 
l0: 0.185503, l1: 0.186810, l2: 0.187057, l3: 0.188424, l4: 0.210095, l5: 0.264986, l6: 0.344930

[epoch:  74/100, batch:   728/  792, ite: 66952] train loss: 4.304683, tar: 0.43

l0: 0.238749, l1: 0.243348, l2: 0.241057, l3: 0.243356, l4: 0.282520, l5: 0.369798, l6: 0.428458

[epoch:  75/100, batch:    16/  792, ite: 66992] train loss: 4.305869, tar: 0.437785 
l0: 0.764633, l1: 0.771815, l2: 0.771536, l3: 0.777262, l4: 0.798290, l5: 0.815803, l6: 1.072363

[epoch:  75/100, batch:    18/  792, ite: 66993] train loss: 4.308443, tar: 0.438115 
l0: 0.240360, l1: 0.239992, l2: 0.238266, l3: 0.243828, l4: 0.265429, l5: 0.291762, l6: 0.407617

[epoch:  75/100, batch:    20/  792, ite: 66994] train loss: 4.306542, tar: 0.437916 
l0: 0.284460, l1: 0.293845, l2: 0.292698, l3: 0.309105, l4: 0.363427, l5: 0.478800, l6: 0.585096

[epoch:  75/100, batch:    22/  792, ite: 66995] train loss: 4.305571, tar: 0.437761 
l0: 0.551880, l1: 0.555160, l2: 0.556449, l3: 0.569539, l4: 0.577004, l5: 0.720898, l6: 0.912377

[epoch:  75/100, batch:    24/  792, ite: 66996] train loss: 4.306551, tar: 0.437876 
l0: 0.239013, l1: 0.246411, l2: 0.247888, l3: 0.257474, l4: 0.308903, l5: 0.4107

[epoch:  75/100, batch:   104/  792, ite: 67036] train loss: 4.319164, tar: 0.439382 
l0: 0.332758, l1: 0.331162, l2: 0.333521, l3: 0.344505, l4: 0.372180, l5: 0.436000, l6: 0.501378

[epoch:  75/100, batch:   106/  792, ite: 67037] train loss: 4.318145, tar: 0.439280 
l0: 0.436867, l1: 0.439166, l2: 0.438794, l3: 0.441115, l4: 0.466972, l5: 0.543260, l6: 0.602581

[epoch:  75/100, batch:   108/  792, ite: 67038] train loss: 4.317830, tar: 0.439277 
l0: 0.257029, l1: 0.260286, l2: 0.259195, l3: 0.271634, l4: 0.318984, l5: 0.444959, l6: 0.720734

[epoch:  75/100, batch:   110/  792, ite: 67039] train loss: 4.316911, tar: 0.439102 
l0: 0.358850, l1: 0.360703, l2: 0.361442, l3: 0.373077, l4: 0.390096, l5: 0.484244, l6: 0.608396

[epoch:  75/100, batch:   112/  792, ite: 67040] train loss: 4.316234, tar: 0.439025 
l0: 0.116840, l1: 0.123665, l2: 0.126742, l3: 0.130433, l4: 0.162933, l5: 0.248388, l6: 0.359333

[epoch:  75/100, batch:   114/  792, ite: 67041] train loss: 4.313669, tar: 0.43

[epoch:  75/100, batch:   194/  792, ite: 67081] train loss: 4.303910, tar: 0.437561 
l0: 0.475632, l1: 0.478043, l2: 0.478113, l3: 0.484326, l4: 0.510686, l5: 0.597292, l6: 0.812804

[epoch:  75/100, batch:   196/  792, ite: 67082] train loss: 4.304288, tar: 0.437597 
l0: 0.274930, l1: 0.272633, l2: 0.272438, l3: 0.275177, l4: 0.299190, l5: 0.436861, l6: 0.544667

[epoch:  75/100, batch:   198/  792, ite: 67083] train loss: 4.303060, tar: 0.437446 
l0: 0.217846, l1: 0.219552, l2: 0.220930, l3: 0.225691, l4: 0.269741, l5: 0.394179, l6: 0.565149

[epoch:  75/100, batch:   200/  792, ite: 67084] train loss: 4.301574, tar: 0.437244 
l0: 0.418378, l1: 0.418311, l2: 0.419670, l3: 0.415446, l4: 0.429764, l5: 0.513503, l6: 0.633668

[epoch:  75/100, batch:   202/  792, ite: 67085] train loss: 4.301189, tar: 0.437226 
l0: 0.321941, l1: 0.321554, l2: 0.323237, l3: 0.326793, l4: 0.353850, l5: 0.435525, l6: 0.642258

[epoch:  75/100, batch:   204/  792, ite: 67086] train loss: 4.300305, tar: 0.43

[epoch:  75/100, batch:   284/  792, ite: 67126] train loss: 4.285451, tar: 0.435199 
l0: 0.381987, l1: 0.382665, l2: 0.378974, l3: 0.382502, l4: 0.387455, l5: 0.474346, l6: 0.672966

[epoch:  75/100, batch:   286/  792, ite: 67127] train loss: 4.285105, tar: 0.435152 
l0: 0.487399, l1: 0.494661, l2: 0.491804, l3: 0.490212, l4: 0.506250, l5: 0.594085, l6: 0.713449

[epoch:  75/100, batch:   288/  792, ite: 67128] train loss: 4.285380, tar: 0.435198 
l0: 0.441134, l1: 0.438279, l2: 0.437366, l3: 0.434902, l4: 0.476423, l5: 0.585628, l6: 0.752359

[epoch:  75/100, batch:   290/  792, ite: 67129] train loss: 4.285552, tar: 0.435204 
l0: 0.310247, l1: 0.316003, l2: 0.314218, l3: 0.323607, l4: 0.372357, l5: 0.572900, l6: 0.761057

[epoch:  75/100, batch:   292/  792, ite: 67130] train loss: 4.285064, tar: 0.435093 
l0: 0.382862, l1: 0.384419, l2: 0.381928, l3: 0.384366, l4: 0.400744, l5: 0.493762, l6: 0.707541

[epoch:  75/100, batch:   294/  792, ite: 67131] train loss: 4.284844, tar: 0.43

[epoch:  75/100, batch:   374/  792, ite: 67171] train loss: 4.289717, tar: 0.436003 
l0: 0.335330, l1: 0.340519, l2: 0.338631, l3: 0.333837, l4: 0.347601, l5: 0.432172, l6: 0.591311

[epoch:  75/100, batch:   376/  792, ite: 67172] train loss: 4.288900, tar: 0.435917 
l0: 0.366011, l1: 0.367228, l2: 0.364936, l3: 0.367722, l4: 0.387939, l5: 0.476687, l6: 0.643952

[epoch:  75/100, batch:   378/  792, ite: 67173] train loss: 4.288384, tar: 0.435857 
l0: 0.466067, l1: 0.473336, l2: 0.472875, l3: 0.481230, l4: 0.513746, l5: 0.590612, l6: 0.785259

[epoch:  75/100, batch:   380/  792, ite: 67174] train loss: 4.288768, tar: 0.435883 
l0: 0.218043, l1: 0.221510, l2: 0.223740, l3: 0.232489, l4: 0.283627, l5: 0.381380, l6: 0.532569

[epoch:  75/100, batch:   382/  792, ite: 67175] train loss: 4.287362, tar: 0.435697 
l0: 0.462688, l1: 0.467735, l2: 0.466729, l3: 0.471867, l4: 0.516196, l5: 0.582936, l6: 0.594454

[epoch:  75/100, batch:   384/  792, ite: 67176] train loss: 4.287273, tar: 0.43

[epoch:  75/100, batch:   464/  792, ite: 67216] train loss: 4.299391, tar: 0.437328 
l0: 0.652174, l1: 0.661656, l2: 0.659375, l3: 0.657356, l4: 0.658762, l5: 0.715181, l6: 0.811757

[epoch:  75/100, batch:   466/  792, ite: 67217] train loss: 4.300625, tar: 0.437505 
l0: 0.208388, l1: 0.206032, l2: 0.205514, l3: 0.208372, l4: 0.246302, l5: 0.391346, l6: 0.493767

[epoch:  75/100, batch:   468/  792, ite: 67218] train loss: 4.299165, tar: 0.437317 
l0: 0.255487, l1: 0.263803, l2: 0.262307, l3: 0.269370, l4: 0.302327, l5: 0.470341, l6: 0.602972

[epoch:  75/100, batch:   470/  792, ite: 67219] train loss: 4.298149, tar: 0.437167 
l0: 0.276062, l1: 0.275846, l2: 0.275701, l3: 0.276382, l4: 0.325122, l5: 0.498226, l6: 0.695196

[epoch:  75/100, batch:   472/  792, ite: 67220] train loss: 4.297368, tar: 0.437035 
l0: 0.592246, l1: 0.595057, l2: 0.597203, l3: 0.602822, l4: 0.624568, l5: 0.752564, l6: 0.997068

[epoch:  75/100, batch:   474/  792, ite: 67221] train loss: 4.298664, tar: 0.43

[epoch:  75/100, batch:   554/  792, ite: 67261] train loss: 4.307771, tar: 0.438294 
l0: 0.368679, l1: 0.370024, l2: 0.374539, l3: 0.376064, l4: 0.390682, l5: 0.473241, l6: 0.546316

[epoch:  75/100, batch:   556/  792, ite: 67262] train loss: 4.307123, tar: 0.438239 
l0: 0.176976, l1: 0.177755, l2: 0.178292, l3: 0.189890, l4: 0.247032, l5: 0.356646, l6: 0.578106

[epoch:  75/100, batch:   558/  792, ite: 67263] train loss: 4.305693, tar: 0.438032 
l0: 0.544798, l1: 0.548987, l2: 0.549460, l3: 0.546874, l4: 0.562733, l5: 0.649678, l6: 0.964176

[epoch:  75/100, batch:   560/  792, ite: 67264] train loss: 4.306624, tar: 0.438117 
l0: 0.550660, l1: 0.553361, l2: 0.553369, l3: 0.558208, l4: 0.588315, l5: 0.702443, l6: 0.835679

[epoch:  75/100, batch:   562/  792, ite: 67265] train loss: 4.307445, tar: 0.438206 
l0: 0.546270, l1: 0.548184, l2: 0.549657, l3: 0.553961, l4: 0.553441, l5: 0.656478, l6: 0.846091

[epoch:  75/100, batch:   564/  792, ite: 67266] train loss: 4.308083, tar: 0.43

[epoch:  75/100, batch:   644/  792, ite: 67306] train loss: 4.312281, tar: 0.438746 
l0: 0.448727, l1: 0.449584, l2: 0.452310, l3: 0.451793, l4: 0.468444, l5: 0.610884, l6: 0.721055

[epoch:  75/100, batch:   646/  792, ite: 67307] train loss: 4.312292, tar: 0.438754 
l0: 0.562578, l1: 0.569133, l2: 0.569085, l3: 0.566788, l4: 0.588500, l5: 0.611628, l6: 0.736287

[epoch:  75/100, batch:   648/  792, ite: 67308] train loss: 4.312798, tar: 0.438848 
l0: 0.398232, l1: 0.398681, l2: 0.402845, l3: 0.405887, l4: 0.415842, l5: 0.503584, l6: 0.693215

[epoch:  75/100, batch:   650/  792, ite: 67309] train loss: 4.312520, tar: 0.438817 
l0: 0.343900, l1: 0.346261, l2: 0.349334, l3: 0.352956, l4: 0.363322, l5: 0.486049, l6: 0.606219

[epoch:  75/100, batch:   652/  792, ite: 67310] train loss: 4.311866, tar: 0.438745 
l0: 0.298801, l1: 0.299225, l2: 0.295885, l3: 0.290920, l4: 0.319553, l5: 0.376139, l6: 0.501888

[epoch:  75/100, batch:   654/  792, ite: 67311] train loss: 4.310791, tar: 0.43

[epoch:  75/100, batch:   734/  792, ite: 67351] train loss: 4.306751, tar: 0.438067 
l0: 0.285312, l1: 0.286203, l2: 0.281618, l3: 0.280225, l4: 0.305313, l5: 0.419451, l6: 0.549668

[epoch:  75/100, batch:   736/  792, ite: 67352] train loss: 4.305741, tar: 0.437954 
l0: 0.447839, l1: 0.457209, l2: 0.456376, l3: 0.462893, l4: 0.465739, l5: 0.555093, l6: 0.723579

[epoch:  75/100, batch:   738/  792, ite: 67353] train loss: 4.305782, tar: 0.437962 
l0: 0.316859, l1: 0.315018, l2: 0.316395, l3: 0.320108, l4: 0.361775, l5: 0.484297, l6: 0.583288

[epoch:  75/100, batch:   740/  792, ite: 67354] train loss: 4.305068, tar: 0.437872 
l0: 0.635464, l1: 0.643132, l2: 0.641728, l3: 0.652146, l4: 0.660078, l5: 0.724011, l6: 0.972931

[epoch:  75/100, batch:   742/  792, ite: 67355] train loss: 4.306408, tar: 0.438018 
l0: 0.347713, l1: 0.347782, l2: 0.345991, l3: 0.350222, l4: 0.356991, l5: 0.509632, l6: 0.699452

[epoch:  75/100, batch:   744/  792, ite: 67356] train loss: 4.305939, tar: 0.43

l0: 0.481922, l1: 0.492180, l2: 0.494297, l3: 0.497993, l4: 0.491332, l5: 0.504767, l6: 0.593122

[epoch:  76/100, batch:    32/  792, ite: 67396] train loss: 4.310688, tar: 0.438604 
l0: 0.486948, l1: 0.488703, l2: 0.490913, l3: 0.493705, l4: 0.513971, l5: 0.528654, l6: 0.603345

[epoch:  76/100, batch:    34/  792, ite: 67397] train loss: 4.310658, tar: 0.438639 
l0: 0.616469, l1: 0.617073, l2: 0.617719, l3: 0.614774, l4: 0.618184, l5: 0.694115, l6: 0.855940

[epoch:  76/100, batch:    36/  792, ite: 67398] train loss: 4.311601, tar: 0.438766 
l0: 0.490742, l1: 0.492570, l2: 0.495126, l3: 0.503066, l4: 0.543820, l5: 0.668856, l6: 0.773067

[epoch:  76/100, batch:    38/  792, ite: 67399] train loss: 4.311896, tar: 0.438803 
l0: 0.192520, l1: 0.202868, l2: 0.201496, l3: 0.204019, l4: 0.223994, l5: 0.321437, l6: 0.425750

[epoch:  76/100, batch:    40/  792, ite: 67400] train loss: 4.310478, tar: 0.438627 
l0: 0.480729, l1: 0.482822, l2: 0.482965, l3: 0.485910, l4: 0.512202, l5: 0.5595

[epoch:  76/100, batch:   120/  792, ite: 67440] train loss: 4.304821, tar: 0.437792 
l0: 0.263514, l1: 0.275614, l2: 0.278385, l3: 0.289517, l4: 0.317689, l5: 0.386185, l6: 0.463248

[epoch:  76/100, batch:   122/  792, ite: 67441] train loss: 4.303840, tar: 0.437672 
l0: 0.745144, l1: 0.764695, l2: 0.747616, l3: 0.720792, l4: 0.713419, l5: 0.790141, l6: 0.766099

[epoch:  76/100, batch:   124/  792, ite: 67442] train loss: 4.305134, tar: 0.437885 
l0: 0.801756, l1: 0.806992, l2: 0.811634, l3: 0.821567, l4: 0.817660, l5: 0.809329, l6: 0.975043

[epoch:  76/100, batch:   126/  792, ite: 67443] train loss: 4.306922, tar: 0.438137 
l0: 0.267644, l1: 0.267860, l2: 0.270364, l3: 0.274676, l4: 0.304797, l5: 0.413466, l6: 0.602237

[epoch:  76/100, batch:   128/  792, ite: 67444] train loss: 4.306130, tar: 0.438019 
l0: 0.321752, l1: 0.319398, l2: 0.320513, l3: 0.321257, l4: 0.364644, l5: 0.426259, l6: 0.540848

[epoch:  76/100, batch:   130/  792, ite: 67445] train loss: 4.305308, tar: 0.43

[epoch:  76/100, batch:   210/  792, ite: 67485] train loss: 4.300328, tar: 0.437162 
l0: 0.540819, l1: 0.551856, l2: 0.547233, l3: 0.540720, l4: 0.551156, l5: 0.596166, l6: 0.709616

[epoch:  76/100, batch:   212/  792, ite: 67486] train loss: 4.300617, tar: 0.437232 
l0: 0.415634, l1: 0.424668, l2: 0.422616, l3: 0.419438, l4: 0.440463, l5: 0.525754, l6: 0.684193

[epoch:  76/100, batch:   214/  792, ite: 67487] train loss: 4.300451, tar: 0.437217 
l0: 0.277722, l1: 0.284044, l2: 0.281877, l3: 0.290367, l4: 0.305409, l5: 0.432392, l6: 0.595012

[epoch:  76/100, batch:   216/  792, ite: 67488] train loss: 4.299640, tar: 0.437110 
l0: 1.066885, l1: 1.076331, l2: 1.077082, l3: 1.099005, l4: 1.120072, l5: 1.107459, l6: 1.282232

[epoch:  76/100, batch:   218/  792, ite: 67489] train loss: 4.302941, tar: 0.437533 
l0: 0.254812, l1: 0.258846, l2: 0.258225, l3: 0.259850, l4: 0.276983, l5: 0.380576, l6: 0.479874

[epoch:  76/100, batch:   220/  792, ite: 67490] train loss: 4.301837, tar: 0.43

[epoch:  76/100, batch:   300/  792, ite: 67530] train loss: 4.303083, tar: 0.437409 
l0: 0.725547, l1: 0.725665, l2: 0.726520, l3: 0.729987, l4: 0.725339, l5: 0.708482, l6: 0.792867

[epoch:  76/100, batch:   302/  792, ite: 67531] train loss: 4.304153, tar: 0.437597 
l0: 0.254933, l1: 0.254203, l2: 0.251153, l3: 0.254825, l4: 0.261383, l5: 0.321114, l6: 0.494525

[epoch:  76/100, batch:   304/  792, ite: 67532] train loss: 4.303007, tar: 0.437478 
l0: 0.202373, l1: 0.216899, l2: 0.215322, l3: 0.215186, l4: 0.227427, l5: 0.385765, l6: 0.556521

[epoch:  76/100, batch:   306/  792, ite: 67533] train loss: 4.301927, tar: 0.437325 
l0: 0.576519, l1: 0.576127, l2: 0.576092, l3: 0.579245, l4: 0.588520, l5: 0.705349, l6: 0.961023

[epoch:  76/100, batch:   308/  792, ite: 67534] train loss: 4.302739, tar: 0.437416 
l0: 0.812287, l1: 0.819888, l2: 0.824445, l3: 0.832610, l4: 0.865664, l5: 0.903745, l6: 0.994425

[epoch:  76/100, batch:   310/  792, ite: 67535] train loss: 4.304635, tar: 0.43

[epoch:  76/100, batch:   390/  792, ite: 67575] train loss: 4.309762, tar: 0.438123 
l0: 0.247517, l1: 0.253543, l2: 0.253357, l3: 0.263378, l4: 0.288234, l5: 0.403238, l6: 0.612461

[epoch:  76/100, batch:   392/  792, ite: 67576] train loss: 4.308889, tar: 0.438003 
l0: 0.267556, l1: 0.272124, l2: 0.271766, l3: 0.274627, l4: 0.310688, l5: 0.444171, l6: 0.543693

[epoch:  76/100, batch:   394/  792, ite: 67577] train loss: 4.308073, tar: 0.437894 
l0: 0.256019, l1: 0.255631, l2: 0.254965, l3: 0.263261, l4: 0.290172, l5: 0.369271, l6: 0.449473

[epoch:  76/100, batch:   396/  792, ite: 67578] train loss: 4.306993, tar: 0.437779 
l0: 0.823731, l1: 0.828865, l2: 0.826908, l3: 0.831429, l4: 0.880717, l5: 0.961141, l6: 1.357907

[epoch:  76/100, batch:   398/  792, ite: 67579] train loss: 4.309163, tar: 0.438024 
l0: 0.411798, l1: 0.414518, l2: 0.415467, l3: 0.414973, l4: 0.453271, l5: 0.601341, l6: 0.817582

[epoch:  76/100, batch:   400/  792, ite: 67580] train loss: 4.309195, tar: 0.43

[epoch:  76/100, batch:   480/  792, ite: 67620] train loss: 4.309740, tar: 0.437822 
l0: 0.305126, l1: 0.309722, l2: 0.306683, l3: 0.311676, l4: 0.354174, l5: 0.499968, l6: 0.725928

[epoch:  76/100, batch:   482/  792, ite: 67621] train loss: 4.309235, tar: 0.437740 
l0: 0.523532, l1: 0.527556, l2: 0.528385, l3: 0.529001, l4: 0.571353, l5: 0.657475, l6: 0.722447

[epoch:  76/100, batch:   484/  792, ite: 67622] train loss: 4.309559, tar: 0.437793 
l0: 0.477719, l1: 0.478322, l2: 0.475922, l3: 0.480155, l4: 0.510649, l5: 0.648593, l6: 0.813531

[epoch:  76/100, batch:   486/  792, ite: 67623] train loss: 4.309822, tar: 0.437817 
l0: 0.334801, l1: 0.337257, l2: 0.339055, l3: 0.355911, l4: 0.411043, l5: 0.484053, l6: 0.613782

[epoch:  76/100, batch:   488/  792, ite: 67624] train loss: 4.309432, tar: 0.437754 
l0: 0.342404, l1: 0.344179, l2: 0.341712, l3: 0.340234, l4: 0.380427, l5: 0.488082, l6: 0.828069

[epoch:  76/100, batch:   490/  792, ite: 67625] train loss: 4.309199, tar: 0.43

[epoch:  76/100, batch:   570/  792, ite: 67665] train loss: 4.302106, tar: 0.436847 
l0: 0.257013, l1: 0.259940, l2: 0.249265, l3: 0.241692, l4: 0.260253, l5: 0.383285, l6: 0.527076

[epoch:  76/100, batch:   572/  792, ite: 67666] train loss: 4.301141, tar: 0.436739 
l0: 0.372588, l1: 0.374231, l2: 0.371299, l3: 0.373099, l4: 0.395697, l5: 0.437168, l6: 0.434576

[epoch:  76/100, batch:   574/  792, ite: 67667] train loss: 4.300507, tar: 0.436701 
l0: 0.268395, l1: 0.268990, l2: 0.269101, l3: 0.277570, l4: 0.316003, l5: 0.396253, l6: 0.592599

[epoch:  76/100, batch:   576/  792, ite: 67668] train loss: 4.299749, tar: 0.436600 
l0: 0.343040, l1: 0.341032, l2: 0.340313, l3: 0.349138, l4: 0.356568, l5: 0.442541, l6: 0.471015

[epoch:  76/100, batch:   578/  792, ite: 67669] train loss: 4.299059, tar: 0.436544 
l0: 0.416456, l1: 0.419151, l2: 0.417774, l3: 0.416290, l4: 0.430862, l5: 0.497076, l6: 0.768904

[epoch:  76/100, batch:   580/  792, ite: 67670] train loss: 4.298983, tar: 0.43

[epoch:  76/100, batch:   660/  792, ite: 67710] train loss: 4.293217, tar: 0.436022 
l0: 0.664175, l1: 0.670492, l2: 0.673199, l3: 0.669659, l4: 0.705081, l5: 0.799678, l6: 0.988789

[epoch:  76/100, batch:   662/  792, ite: 67711] train loss: 4.294370, tar: 0.436156 
l0: 0.409141, l1: 0.414670, l2: 0.410527, l3: 0.406566, l4: 0.411127, l5: 0.506211, l6: 0.654437

[epoch:  76/100, batch:   664/  792, ite: 67712] train loss: 4.294145, tar: 0.436140 
l0: 0.312784, l1: 0.312283, l2: 0.313771, l3: 0.313801, l4: 0.335947, l5: 0.494251, l6: 0.736752

[epoch:  76/100, batch:   666/  792, ite: 67713] train loss: 4.293781, tar: 0.436068 
l0: 0.818736, l1: 0.824069, l2: 0.820286, l3: 0.815047, l4: 0.840157, l5: 0.929717, l6: 1.073932

[epoch:  76/100, batch:   668/  792, ite: 67714] train loss: 4.295562, tar: 0.436291 
l0: 0.457406, l1: 0.453514, l2: 0.455355, l3: 0.461635, l4: 0.495562, l5: 0.570420, l6: 0.800745

[epoch:  76/100, batch:   670/  792, ite: 67715] train loss: 4.295714, tar: 0.43

[epoch:  76/100, batch:   750/  792, ite: 67755] train loss: 4.289267, tar: 0.435537 
l0: 0.318093, l1: 0.321810, l2: 0.322331, l3: 0.317863, l4: 0.354963, l5: 0.450941, l6: 0.671194

[epoch:  76/100, batch:   752/  792, ite: 67756] train loss: 4.288817, tar: 0.435470 
l0: 0.923303, l1: 0.929234, l2: 0.928602, l3: 0.933486, l4: 0.950015, l5: 1.053285, l6: 1.136975

[epoch:  76/100, batch:   754/  792, ite: 67757] train loss: 4.290963, tar: 0.435748 
l0: 0.622908, l1: 0.625430, l2: 0.624851, l3: 0.629197, l4: 0.633826, l5: 0.604461, l6: 0.791868

[epoch:  76/100, batch:   756/  792, ite: 67758] train loss: 4.291638, tar: 0.435854 
l0: 0.287069, l1: 0.291818, l2: 0.291618, l3: 0.295846, l4: 0.314709, l5: 0.452534, l6: 0.629667

[epoch:  76/100, batch:   758/  792, ite: 67759] train loss: 4.291046, tar: 0.435769 
l0: 0.163219, l1: 0.170721, l2: 0.173259, l3: 0.178441, l4: 0.198049, l5: 0.316125, l6: 0.399398

[epoch:  76/100, batch:   760/  792, ite: 67760] train loss: 4.289761, tar: 0.43

l0: 0.272388, l1: 0.277556, l2: 0.278736, l3: 0.290962, l4: 0.327740, l5: 0.400469, l6: 0.537263

[epoch:  77/100, batch:    48/  792, ite: 67800] train loss: 4.282644, tar: 0.434809 
l0: 0.298688, l1: 0.303940, l2: 0.302360, l3: 0.312272, l4: 0.341601, l5: 0.438758, l6: 0.582036

[epoch:  77/100, batch:    50/  792, ite: 67801] train loss: 4.282045, tar: 0.434734 
l0: 0.397266, l1: 0.405144, l2: 0.405888, l3: 0.408112, l4: 0.438478, l5: 0.562536, l6: 0.820653

[epoch:  77/100, batch:    52/  792, ite: 67802] train loss: 4.281979, tar: 0.434713 
l0: 0.390813, l1: 0.390351, l2: 0.393244, l3: 0.389350, l4: 0.392429, l5: 0.445916, l6: 0.565747

[epoch:  77/100, batch:    54/  792, ite: 67803] train loss: 4.281625, tar: 0.434689 
l0: 0.480308, l1: 0.474879, l2: 0.475017, l3: 0.472545, l4: 0.494020, l5: 0.569289, l6: 0.664660

[epoch:  77/100, batch:    56/  792, ite: 67804] train loss: 4.281690, tar: 0.434714 
l0: 1.010612, l1: 1.051168, l2: 1.041104, l3: 1.039964, l4: 1.082179, l5: 1.0764

[epoch:  77/100, batch:   136/  792, ite: 67844] train loss: 4.286057, tar: 0.435260 
l0: 0.510425, l1: 0.509906, l2: 0.511535, l3: 0.507862, l4: 0.493783, l5: 0.554237, l6: 0.699190

[epoch:  77/100, batch:   138/  792, ite: 67845] train loss: 4.286188, tar: 0.435301 
l0: 0.376667, l1: 0.375150, l2: 0.373890, l3: 0.382646, l4: 0.429050, l5: 0.607697, l6: 0.744413

[epoch:  77/100, batch:   140/  792, ite: 67846] train loss: 4.286102, tar: 0.435269 
l0: 0.399213, l1: 0.401449, l2: 0.402009, l3: 0.402273, l4: 0.427094, l5: 0.489418, l6: 0.615341

[epoch:  77/100, batch:   142/  792, ite: 67847] train loss: 4.285853, tar: 0.435250 
l0: 0.237215, l1: 0.244236, l2: 0.241594, l3: 0.248773, l4: 0.254197, l5: 0.329207, l6: 0.453065

[epoch:  77/100, batch:   144/  792, ite: 67848] train loss: 4.284909, tar: 0.435143 
l0: 0.414451, l1: 0.419430, l2: 0.416616, l3: 0.427769, l4: 0.463096, l5: 0.556041, l6: 0.733355

[epoch:  77/100, batch:   146/  792, ite: 67849] train loss: 4.284913, tar: 0.43

[epoch:  77/100, batch:   226/  792, ite: 67889] train loss: 4.284069, tar: 0.434853 
l0: 1.002716, l1: 0.997690, l2: 0.991435, l3: 1.009441, l4: 1.063818, l5: 1.140391, l6: 1.228249

[epoch:  77/100, batch:   228/  792, ite: 67890] train loss: 4.286472, tar: 0.435154 
l0: 0.523077, l1: 0.526069, l2: 0.527771, l3: 0.535091, l4: 0.572720, l5: 0.635927, l6: 0.678805

[epoch:  77/100, batch:   230/  792, ite: 67891] train loss: 4.286722, tar: 0.435200 
l0: 0.457572, l1: 0.455027, l2: 0.446033, l3: 0.446664, l4: 0.461561, l5: 0.570149, l6: 0.610307

[epoch:  77/100, batch:   232/  792, ite: 67892] train loss: 4.286612, tar: 0.435212 
l0: 0.223141, l1: 0.225460, l2: 0.227665, l3: 0.228428, l4: 0.238274, l5: 0.370803, l6: 0.427414

[epoch:  77/100, batch:   234/  792, ite: 67893] train loss: 4.285582, tar: 0.435100 
l0: 0.395399, l1: 0.396740, l2: 0.395986, l3: 0.400320, l4: 0.420831, l5: 0.534023, l6: 0.676997

[epoch:  77/100, batch:   236/  792, ite: 67894] train loss: 4.285429, tar: 0.43

[epoch:  77/100, batch:   316/  792, ite: 67934] train loss: 4.288400, tar: 0.435481 
l0: 0.199225, l1: 0.201653, l2: 0.204058, l3: 0.213198, l4: 0.248393, l5: 0.329099, l6: 0.484842

[epoch:  77/100, batch:   318/  792, ite: 67935] train loss: 4.287457, tar: 0.435358 
l0: 0.500082, l1: 0.503127, l2: 0.501986, l3: 0.502537, l4: 0.540191, l5: 0.644531, l6: 0.789496

[epoch:  77/100, batch:   320/  792, ite: 67936] train loss: 4.287789, tar: 0.435392 
l0: 0.405798, l1: 0.413793, l2: 0.415371, l3: 0.424012, l4: 0.442878, l5: 0.545460, l6: 0.730456

[epoch:  77/100, batch:   322/  792, ite: 67937] train loss: 4.287719, tar: 0.435377 
l0: 0.212933, l1: 0.217221, l2: 0.218017, l3: 0.223590, l4: 0.275950, l5: 0.459183, l6: 0.551671

[epoch:  77/100, batch:   324/  792, ite: 67938] train loss: 4.286956, tar: 0.435262 
l0: 0.259905, l1: 0.263300, l2: 0.267843, l3: 0.270625, l4: 0.310338, l5: 0.371223, l6: 0.538757

[epoch:  77/100, batch:   326/  792, ite: 67939] train loss: 4.286200, tar: 0.43

[epoch:  77/100, batch:   406/  792, ite: 67979] train loss: 4.286405, tar: 0.435163 
l0: 0.412464, l1: 0.414941, l2: 0.415885, l3: 0.416526, l4: 0.442184, l5: 0.529935, l6: 0.678059

[epoch:  77/100, batch:   408/  792, ite: 67980] train loss: 4.286302, tar: 0.435152 
l0: 0.438708, l1: 0.444299, l2: 0.443113, l3: 0.447231, l4: 0.481014, l5: 0.517089, l6: 0.836731

[epoch:  77/100, batch:   410/  792, ite: 67981] train loss: 4.286386, tar: 0.435154 
l0: 0.409743, l1: 0.413400, l2: 0.413995, l3: 0.408593, l4: 0.421185, l5: 0.501651, l6: 0.705418

[epoch:  77/100, batch:   412/  792, ite: 67982] train loss: 4.286237, tar: 0.435141 
l0: 0.660015, l1: 0.655548, l2: 0.652586, l3: 0.656432, l4: 0.671719, l5: 0.769211, l6: 0.937985

[epoch:  77/100, batch:   414/  792, ite: 67983] train loss: 4.287094, tar: 0.435254 
l0: 0.367621, l1: 0.368586, l2: 0.368048, l3: 0.371340, l4: 0.380047, l5: 0.432326, l6: 0.541164

[epoch:  77/100, batch:   416/  792, ite: 67984] train loss: 4.286704, tar: 0.43

[epoch:  77/100, batch:   496/  792, ite: 68024] train loss: 4.430035, tar: 0.454047 
l0: 0.358891, l1: 0.362964, l2: 0.362811, l3: 0.364232, l4: 0.382079, l5: 0.491616, l6: 0.602846

[epoch:  77/100, batch:   498/  792, ite: 68025] train loss: 4.396708, tar: 0.450240 
l0: 0.414319, l1: 0.413261, l2: 0.411584, l3: 0.418136, l4: 0.434381, l5: 0.550221, l6: 0.706396

[epoch:  77/100, batch:   500/  792, ite: 68026] train loss: 4.387665, tar: 0.448859 
l0: 0.306910, l1: 0.314815, l2: 0.315286, l3: 0.318094, l4: 0.349500, l5: 0.450711, l6: 0.688650

[epoch:  77/100, batch:   502/  792, ite: 68027] train loss: 4.356483, tar: 0.443601 
l0: 0.348750, l1: 0.354805, l2: 0.356256, l3: 0.360298, l4: 0.371494, l5: 0.401909, l6: 0.579585

[epoch:  77/100, batch:   504/  792, ite: 68028] train loss: 4.321425, tar: 0.440214 
l0: 0.364040, l1: 0.361782, l2: 0.360234, l3: 0.373382, l4: 0.385951, l5: 0.490522, l6: 0.723747

[epoch:  77/100, batch:   506/  792, ite: 68029] train loss: 4.305571, tar: 0.43

[epoch:  77/100, batch:   586/  792, ite: 68069] train loss: 4.285620, tar: 0.434201 
l0: 0.332932, l1: 0.334576, l2: 0.333394, l3: 0.329267, l4: 0.341817, l5: 0.366585, l6: 0.530705

[epoch:  77/100, batch:   588/  792, ite: 68070] train loss: 4.269548, tar: 0.432754 
l0: 0.547577, l1: 0.548899, l2: 0.543947, l3: 0.537058, l4: 0.581500, l5: 0.695876, l6: 0.992749

[epoch:  77/100, batch:   590/  792, ite: 68071] train loss: 4.286894, tar: 0.434372 
l0: 0.259948, l1: 0.265821, l2: 0.261656, l3: 0.256187, l4: 0.280911, l5: 0.321969, l6: 0.492662

[epoch:  77/100, batch:   592/  792, ite: 68072] train loss: 4.265504, tar: 0.431949 
l0: 0.631693, l1: 0.637711, l2: 0.630053, l3: 0.624855, l4: 0.674649, l5: 0.824906, l6: 0.959722

[epoch:  77/100, batch:   594/  792, ite: 68073] train loss: 4.291191, tar: 0.434685 
l0: 0.444789, l1: 0.447058, l2: 0.447117, l3: 0.445972, l4: 0.456508, l5: 0.576790, l6: 0.857895

[epoch:  77/100, batch:   596/  792, ite: 68074] train loss: 4.294075, tar: 0.43

[epoch:  77/100, batch:   676/  792, ite: 68114] train loss: 4.322572, tar: 0.439324 
l0: 0.733446, l1: 0.734925, l2: 0.736747, l3: 0.732052, l4: 0.735820, l5: 0.843373, l6: 0.884835

[epoch:  77/100, batch:   678/  792, ite: 68115] train loss: 4.341352, tar: 0.441882 
l0: 0.416334, l1: 0.418276, l2: 0.421111, l3: 0.421103, l4: 0.444537, l5: 0.547320, l6: 0.751870

[epoch:  77/100, batch:   680/  792, ite: 68116] train loss: 4.340664, tar: 0.441662 
l0: 0.246226, l1: 0.245870, l2: 0.250472, l3: 0.271376, l4: 0.295241, l5: 0.409489, l6: 0.582802

[epoch:  77/100, batch:   682/  792, ite: 68117] train loss: 4.328386, tar: 0.439991 
l0: 0.572833, l1: 0.571733, l2: 0.568814, l3: 0.576770, l4: 0.643620, l5: 0.853728, l6: 1.102213

[epoch:  77/100, batch:   684/  792, ite: 68118] train loss: 4.342858, tar: 0.441117 
l0: 0.564024, l1: 0.560031, l2: 0.566100, l3: 0.562560, l4: 0.576709, l5: 0.652291, l6: 0.846230

[epoch:  77/100, batch:   686/  792, ite: 68119] train loss: 4.349982, tar: 0.44

[epoch:  77/100, batch:   766/  792, ite: 68159] train loss: 4.350627, tar: 0.443533 
l0: 0.425002, l1: 0.423399, l2: 0.424070, l3: 0.421869, l4: 0.444829, l5: 0.536406, l6: 0.673706

[epoch:  77/100, batch:   768/  792, ite: 68160] train loss: 4.349230, tar: 0.443417 
l0: 0.327329, l1: 0.327374, l2: 0.329255, l3: 0.326641, l4: 0.340691, l5: 0.461970, l6: 0.610708

[epoch:  77/100, batch:   770/  792, ite: 68161] train loss: 4.343521, tar: 0.442696 
l0: 0.636906, l1: 0.627195, l2: 0.627122, l3: 0.623786, l4: 0.665583, l5: 0.659928, l6: 0.789694

[epoch:  77/100, batch:   772/  792, ite: 68162] train loss: 4.350570, tar: 0.443895 
l0: 0.543998, l1: 0.555705, l2: 0.556449, l3: 0.562210, l4: 0.559522, l5: 0.559928, l6: 0.677849

[epoch:  77/100, batch:   774/  792, ite: 68163] train loss: 4.352729, tar: 0.444509 
l0: 0.197234, l1: 0.203972, l2: 0.198718, l3: 0.200667, l4: 0.252187, l5: 0.422211, l6: 0.526180

[epoch:  77/100, batch:   776/  792, ite: 68164] train loss: 4.342000, tar: 0.44

l0: 0.435816, l1: 0.438459, l2: 0.438623, l3: 0.445923, l4: 0.476653, l5: 0.538679, l6: 0.793662

[epoch:  78/100, batch:    64/  792, ite: 68204] train loss: 4.404792, tar: 0.451630 
l0: 0.564358, l1: 0.563640, l2: 0.561859, l3: 0.570623, l4: 0.594896, l5: 0.620246, l6: 0.733827

[epoch:  78/100, batch:    66/  792, ite: 68205] train loss: 4.407863, tar: 0.452180 
l0: 0.423454, l1: 0.419587, l2: 0.421513, l3: 0.411339, l4: 0.419662, l5: 0.491733, l6: 0.583163

[epoch:  78/100, batch:    68/  792, ite: 68206] train loss: 4.404782, tar: 0.452040 
l0: 0.593045, l1: 0.598270, l2: 0.594904, l3: 0.598852, l4: 0.626635, l5: 0.672312, l6: 0.905746

[epoch:  78/100, batch:    70/  792, ite: 68207] train loss: 4.410782, tar: 0.452721 
l0: 0.322430, l1: 0.325036, l2: 0.325303, l3: 0.329745, l4: 0.360954, l5: 0.493588, l6: 0.720743

[epoch:  78/100, batch:    72/  792, ite: 68208] train loss: 4.406691, tar: 0.452095 
l0: 0.482170, l1: 0.484174, l2: 0.483394, l3: 0.481205, l4: 0.519913, l5: 0.5951

[epoch:  78/100, batch:   152/  792, ite: 68248] train loss: 4.378274, tar: 0.448630 
l0: 0.265684, l1: 0.268669, l2: 0.273063, l3: 0.287029, l4: 0.328036, l5: 0.500000, l6: 0.683953

[epoch:  78/100, batch:   154/  792, ite: 68249] train loss: 4.374127, tar: 0.447896 
l0: 0.324316, l1: 0.324617, l2: 0.325126, l3: 0.325052, l4: 0.375688, l5: 0.447296, l6: 0.587850

[epoch:  78/100, batch:   156/  792, ite: 68250] train loss: 4.370046, tar: 0.447401 
l0: 0.214340, l1: 0.213548, l2: 0.216393, l3: 0.219968, l4: 0.236732, l5: 0.377360, l6: 0.505298

[epoch:  78/100, batch:   158/  792, ite: 68251] train loss: 4.362716, tar: 0.446473 
l0: 0.593879, l1: 0.605494, l2: 0.605891, l3: 0.609605, l4: 0.635043, l5: 0.720666, l6: 0.945086

[epoch:  78/100, batch:   160/  792, ite: 68252] train loss: 4.368061, tar: 0.447058 
l0: 0.308617, l1: 0.308907, l2: 0.309531, l3: 0.308391, l4: 0.323518, l5: 0.414161, l6: 0.505848

[epoch:  78/100, batch:   162/  792, ite: 68253] train loss: 4.362542, tar: 0.44

[epoch:  78/100, batch:   242/  792, ite: 68293] train loss: 4.375402, tar: 0.448090 
l0: 0.321446, l1: 0.324819, l2: 0.326617, l3: 0.335270, l4: 0.366113, l5: 0.424169, l6: 0.626875

[epoch:  78/100, batch:   244/  792, ite: 68294] train loss: 4.372257, tar: 0.447659 
l0: 0.247070, l1: 0.250686, l2: 0.249176, l3: 0.246282, l4: 0.265874, l5: 0.315890, l6: 0.438103

[epoch:  78/100, batch:   246/  792, ite: 68295] train loss: 4.365751, tar: 0.446979 
l0: 0.216989, l1: 0.219745, l2: 0.216649, l3: 0.224774, l4: 0.250250, l5: 0.396769, l6: 0.532163

[epoch:  78/100, batch:   248/  792, ite: 68296] train loss: 4.359838, tar: 0.446202 
l0: 0.561184, l1: 0.563308, l2: 0.564060, l3: 0.577229, l4: 0.602093, l5: 0.683815, l6: 0.863106

[epoch:  78/100, batch:   250/  792, ite: 68297] train loss: 4.363103, tar: 0.446589 
l0: 0.449271, l1: 0.463440, l2: 0.465566, l3: 0.471233, l4: 0.467784, l5: 0.526398, l6: 0.689350

[epoch:  78/100, batch:   252/  792, ite: 68298] train loss: 4.363177, tar: 0.44

[epoch:  78/100, batch:   332/  792, ite: 68338] train loss: 4.333706, tar: 0.442874 
l0: 0.217342, l1: 0.218253, l2: 0.218437, l3: 0.225708, l4: 0.241559, l5: 0.355485, l6: 0.491014

[epoch:  78/100, batch:   334/  792, ite: 68339] train loss: 4.328117, tar: 0.442209 
l0: 0.313888, l1: 0.311945, l2: 0.313188, l3: 0.316910, l4: 0.327402, l5: 0.439758, l6: 0.524608

[epoch:  78/100, batch:   336/  792, ite: 68340] train loss: 4.324672, tar: 0.441832 
l0: 0.269640, l1: 0.270305, l2: 0.270958, l3: 0.282598, l4: 0.289061, l5: 0.409567, l6: 0.601579

[epoch:  78/100, batch:   338/  792, ite: 68341] train loss: 4.320748, tar: 0.441327 
l0: 0.378111, l1: 0.386568, l2: 0.389503, l3: 0.392873, l4: 0.396378, l5: 0.512256, l6: 0.786044

[epoch:  78/100, batch:   340/  792, ite: 68342] train loss: 4.320115, tar: 0.441142 
l0: 0.356494, l1: 0.350158, l2: 0.352939, l3: 0.355625, l4: 0.363381, l5: 0.489717, l6: 0.611583

[epoch:  78/100, batch:   342/  792, ite: 68343] train loss: 4.317888, tar: 0.44

[epoch:  78/100, batch:   422/  792, ite: 68383] train loss: 4.295738, tar: 0.437875 
l0: 0.562556, l1: 0.567440, l2: 0.569058, l3: 0.560819, l4: 0.588055, l5: 0.722276, l6: 0.902970

[epoch:  78/100, batch:   424/  792, ite: 68384] train loss: 4.299015, tar: 0.438199 
l0: 0.271917, l1: 0.270280, l2: 0.276752, l3: 0.282223, l4: 0.303415, l5: 0.412188, l6: 0.558412

[epoch:  78/100, batch:   426/  792, ite: 68385] train loss: 4.295727, tar: 0.437767 
l0: 0.575068, l1: 0.579742, l2: 0.578262, l3: 0.578107, l4: 0.621631, l5: 0.810841, l6: 1.003723

[epoch:  78/100, batch:   428/  792, ite: 68386] train loss: 4.299802, tar: 0.438123 
l0: 0.464461, l1: 0.469135, l2: 0.472057, l3: 0.460212, l4: 0.458665, l5: 0.520760, l6: 0.669841

[epoch:  78/100, batch:   430/  792, ite: 68387] train loss: 4.299721, tar: 0.438191 
l0: 0.687151, l1: 0.672383, l2: 0.665845, l3: 0.665722, l4: 0.693202, l5: 0.791574, l6: 0.876763

[epoch:  78/100, batch:   432/  792, ite: 68388] train loss: 4.304322, tar: 0.43

[epoch:  78/100, batch:   512/  792, ite: 68428] train loss: 4.295890, tar: 0.436736 
l0: 0.356447, l1: 0.359807, l2: 0.360234, l3: 0.360074, l4: 0.390696, l5: 0.538537, l6: 0.673793

[epoch:  78/100, batch:   514/  792, ite: 68429] train loss: 4.294584, tar: 0.436548 
l0: 0.351457, l1: 0.352328, l2: 0.354059, l3: 0.356599, l4: 0.367122, l5: 0.354958, l6: 0.445007

[epoch:  78/100, batch:   516/  792, ite: 68430] train loss: 4.291967, tar: 0.436351 
l0: 0.288973, l1: 0.294426, l2: 0.298090, l3: 0.296726, l4: 0.327737, l5: 0.498424, l6: 0.748149

[epoch:  78/100, batch:   518/  792, ite: 68431] train loss: 4.290352, tar: 0.436009 
l0: 0.398394, l1: 0.395477, l2: 0.395063, l3: 0.396595, l4: 0.403437, l5: 0.434217, l6: 0.713129

[epoch:  78/100, batch:   520/  792, ite: 68432] train loss: 4.289449, tar: 0.435922 
l0: 0.401732, l1: 0.401533, l2: 0.402243, l3: 0.414903, l4: 0.429181, l5: 0.562994, l6: 0.701815

[epoch:  78/100, batch:   522/  792, ite: 68433] train loss: 4.288985, tar: 0.43

[epoch:  78/100, batch:   602/  792, ite: 68473] train loss: 4.293728, tar: 0.435771 
l0: 1.143107, l1: 1.140643, l2: 1.142939, l3: 1.138539, l4: 1.125034, l5: 1.165361, l6: 1.205359

[epoch:  78/100, batch:   604/  792, ite: 68474] train loss: 4.304694, tar: 0.437263 
l0: 0.325919, l1: 0.327345, l2: 0.327340, l3: 0.330452, l4: 0.355435, l5: 0.425039, l6: 0.597339

[epoch:  78/100, batch:   606/  792, ite: 68475] train loss: 4.302624, tar: 0.437028 
l0: 0.194396, l1: 0.199760, l2: 0.202715, l3: 0.207791, l4: 0.240074, l5: 0.340192, l6: 0.488988

[epoch:  78/100, batch:   608/  792, ite: 68476] train loss: 4.298460, tar: 0.436519 
l0: 0.296467, l1: 0.297088, l2: 0.298116, l3: 0.303907, l4: 0.331272, l5: 0.474197, l6: 0.727967

[epoch:  78/100, batch:   610/  792, ite: 68477] train loss: 4.297000, tar: 0.436225 
l0: 0.468185, l1: 0.469364, l2: 0.469827, l3: 0.466838, l4: 0.461854, l5: 0.554434, l6: 0.758821

[epoch:  78/100, batch:   612/  792, ite: 68478] train loss: 4.297267, tar: 0.43

[epoch:  78/100, batch:   692/  792, ite: 68518] train loss: 4.280613, tar: 0.434241 
l0: 0.306702, l1: 0.310927, l2: 0.309953, l3: 0.314168, l4: 0.326510, l5: 0.408808, l6: 0.549061

[epoch:  78/100, batch:   694/  792, ite: 68519] train loss: 4.278361, tar: 0.433995 
l0: 0.161393, l1: 0.166193, l2: 0.166327, l3: 0.170733, l4: 0.186242, l5: 0.274950, l6: 0.363720

[epoch:  78/100, batch:   696/  792, ite: 68520] train loss: 4.273751, tar: 0.433471 
l0: 0.677398, l1: 0.676949, l2: 0.679518, l3: 0.694458, l4: 0.722706, l5: 0.822487, l6: 0.961626

[epoch:  78/100, batch:   698/  792, ite: 68521] train loss: 4.277466, tar: 0.433939 
l0: 0.890803, l1: 0.912623, l2: 0.914574, l3: 0.926672, l4: 0.965742, l5: 1.047710, l6: 1.187169

[epoch:  78/100, batch:   700/  792, ite: 68522] train loss: 4.284556, tar: 0.434814 
l0: 0.554750, l1: 0.555373, l2: 0.556031, l3: 0.558685, l4: 0.644175, l5: 0.810904, l6: 1.062808

[epoch:  78/100, batch:   702/  792, ite: 68523] train loss: 4.287603, tar: 0.43

[epoch:  78/100, batch:   782/  792, ite: 68563] train loss: 4.279320, tar: 0.434403 
l0: 0.535251, l1: 0.541159, l2: 0.539012, l3: 0.544588, l4: 0.566241, l5: 0.634397, l6: 0.746681

[epoch:  78/100, batch:   784/  792, ite: 68564] train loss: 4.280499, tar: 0.434582 
l0: 0.603736, l1: 0.600464, l2: 0.598691, l3: 0.602507, l4: 0.641282, l5: 0.709825, l6: 0.840754

[epoch:  78/100, batch:   786/  792, ite: 68565] train loss: 4.282543, tar: 0.434881 
l0: 0.379617, l1: 0.381831, l2: 0.383743, l3: 0.389860, l4: 0.436387, l5: 0.558083, l6: 0.671030

[epoch:  78/100, batch:   788/  792, ite: 68566] train loss: 4.281930, tar: 0.434784 
l0: 0.368024, l1: 0.368322, l2: 0.370444, l3: 0.373743, l4: 0.417829, l5: 0.531342, l6: 0.692720

[epoch:  78/100, batch:   790/  792, ite: 68567] train loss: 4.281285, tar: 0.434666 
l0: 0.237248, l1: 0.240412, l2: 0.243374, l3: 0.245332, l4: 0.276257, l5: 0.383076, l6: 0.505597

[epoch:  78/100, batch:   792/  792, ite: 68568] train loss: 4.278391, tar: 0.43

l0: 0.493053, l1: 0.498990, l2: 0.498173, l3: 0.505090, l4: 0.530294, l5: 0.660525, l6: 0.760798

[epoch:  79/100, batch:    80/  792, ite: 68608] train loss: 4.266814, tar: 0.432539 
l0: 0.178016, l1: 0.181589, l2: 0.185751, l3: 0.198005, l4: 0.236252, l5: 0.343069, l6: 0.469821

[epoch:  79/100, batch:    82/  792, ite: 68609] train loss: 4.263564, tar: 0.432121 
l0: 0.448836, l1: 0.454031, l2: 0.449718, l3: 0.450895, l4: 0.478308, l5: 0.567989, l6: 0.746063

[epoch:  79/100, batch:    84/  792, ite: 68610] train loss: 4.263732, tar: 0.432149 
l0: 0.424853, l1: 0.423856, l2: 0.426980, l3: 0.429168, l4: 0.448695, l5: 0.523891, l6: 0.665203

[epoch:  79/100, batch:    86/  792, ite: 68611] train loss: 4.263581, tar: 0.432137 
l0: 0.339947, l1: 0.344120, l2: 0.346491, l3: 0.355015, l4: 0.368312, l5: 0.451626, l6: 0.678280

[epoch:  79/100, batch:    88/  792, ite: 68612] train loss: 4.262528, tar: 0.431986 
l0: 0.243488, l1: 0.246298, l2: 0.249526, l3: 0.260298, l4: 0.297968, l5: 0.4515

[epoch:  79/100, batch:   168/  792, ite: 68652] train loss: 4.246406, tar: 0.429459 
l0: 0.545264, l1: 0.551845, l2: 0.551082, l3: 0.556605, l4: 0.594214, l5: 0.706765, l6: 0.873106

[epoch:  79/100, batch:   170/  792, ite: 68653] train loss: 4.248078, tar: 0.429637 
l0: 0.306205, l1: 0.308696, l2: 0.310093, l3: 0.312996, l4: 0.332233, l5: 0.464440, l6: 0.527573

[epoch:  79/100, batch:   172/  792, ite: 68654] train loss: 4.246777, tar: 0.429448 
l0: 0.197197, l1: 0.198474, l2: 0.199207, l3: 0.205285, l4: 0.225982, l5: 0.416215, l6: 0.466984

[epoch:  79/100, batch:   174/  792, ite: 68655] train loss: 4.243988, tar: 0.429093 
l0: 0.303356, l1: 0.306870, l2: 0.307263, l3: 0.305340, l4: 0.332085, l5: 0.424018, l6: 0.514205

[epoch:  79/100, batch:   176/  792, ite: 68656] train loss: 4.242090, tar: 0.428902 
l0: 0.601432, l1: 0.604831, l2: 0.605911, l3: 0.609887, l4: 0.614744, l5: 0.775967, l6: 0.801840

[epoch:  79/100, batch:   178/  792, ite: 68657] train loss: 4.243846, tar: 0.42

[epoch:  79/100, batch:   258/  792, ite: 68697] train loss: 4.255074, tar: 0.430870 
l0: 0.641662, l1: 0.646093, l2: 0.648441, l3: 0.645832, l4: 0.658279, l5: 0.767044, l6: 1.163283

[epoch:  79/100, batch:   260/  792, ite: 68698] train loss: 4.257886, tar: 0.431172 
l0: 0.310091, l1: 0.312432, l2: 0.310659, l3: 0.314797, l4: 0.342569, l5: 0.529337, l6: 0.814837

[epoch:  79/100, batch:   262/  792, ite: 68699] train loss: 4.257080, tar: 0.430998 
l0: 0.554934, l1: 0.560966, l2: 0.558619, l3: 0.556078, l4: 0.585668, l5: 0.667117, l6: 0.889094

[epoch:  79/100, batch:   264/  792, ite: 68700] train loss: 4.258883, tar: 0.431175 
l0: 0.347287, l1: 0.360133, l2: 0.357666, l3: 0.345580, l4: 0.396168, l5: 0.449204, l6: 0.576496

[epoch:  79/100, batch:   266/  792, ite: 68701] train loss: 4.257673, tar: 0.431056 
l0: 0.530104, l1: 0.535498, l2: 0.535723, l3: 0.539733, l4: 0.575397, l5: 0.648624, l6: 0.921706

[epoch:  79/100, batch:   268/  792, ite: 68702] train loss: 4.259234, tar: 0.43

[epoch:  79/100, batch:   348/  792, ite: 68742] train loss: 4.257916, tar: 0.431187 
l0: 0.337665, l1: 0.338242, l2: 0.336529, l3: 0.335979, l4: 0.354370, l5: 0.413775, l6: 0.634987

[epoch:  79/100, batch:   350/  792, ite: 68743] train loss: 4.256841, tar: 0.431061 
l0: 0.462061, l1: 0.466194, l2: 0.462464, l3: 0.460818, l4: 0.482300, l5: 0.576676, l6: 0.731198

[epoch:  79/100, batch:   352/  792, ite: 68744] train loss: 4.257010, tar: 0.431103 
l0: 0.432101, l1: 0.433397, l2: 0.434386, l3: 0.440191, l4: 0.482381, l5: 0.626130, l6: 0.827938

[epoch:  79/100, batch:   354/  792, ite: 68745] train loss: 4.257449, tar: 0.431104 
l0: 0.282954, l1: 0.284983, l2: 0.285857, l3: 0.297567, l4: 0.336695, l5: 0.444414, l6: 0.604195

[epoch:  79/100, batch:   356/  792, ite: 68746] train loss: 4.256012, tar: 0.430906 
l0: 0.395083, l1: 0.406215, l2: 0.409240, l3: 0.414897, l4: 0.450107, l5: 0.476226, l6: 0.676826

[epoch:  79/100, batch:   358/  792, ite: 68747] train loss: 4.255687, tar: 0.43

[epoch:  79/100, batch:   438/  792, ite: 68787] train loss: 4.244509, tar: 0.429558 
l0: 0.239498, l1: 0.239750, l2: 0.238520, l3: 0.241886, l4: 0.252299, l5: 0.338312, l6: 0.442472

[epoch:  79/100, batch:   440/  792, ite: 68788] train loss: 4.242263, tar: 0.429316 
l0: 0.709789, l1: 0.716361, l2: 0.711112, l3: 0.705077, l4: 0.708049, l5: 0.771749, l6: 0.885177

[epoch:  79/100, batch:   442/  792, ite: 68789] train loss: 4.244592, tar: 0.429672 
l0: 0.503493, l1: 0.510324, l2: 0.513150, l3: 0.519879, l4: 0.545530, l5: 0.582595, l6: 0.714983

[epoch:  79/100, batch:   444/  792, ite: 68790] train loss: 4.245174, tar: 0.429765 
l0: 0.280244, l1: 0.281891, l2: 0.281582, l3: 0.285405, l4: 0.305589, l5: 0.409136, l6: 0.486583

[epoch:  79/100, batch:   446/  792, ite: 68791] train loss: 4.243405, tar: 0.429576 
l0: 0.332291, l1: 0.344540, l2: 0.340506, l3: 0.348767, l4: 0.357881, l5: 0.492713, l6: 0.510317

[epoch:  79/100, batch:   448/  792, ite: 68792] train loss: 4.242364, tar: 0.42

[epoch:  79/100, batch:   528/  792, ite: 68832] train loss: 4.239347, tar: 0.429511 
l0: 0.376812, l1: 0.373497, l2: 0.371395, l3: 0.376217, l4: 0.400320, l5: 0.511838, l6: 0.651021

[epoch:  79/100, batch:   530/  792, ite: 68833] train loss: 4.238833, tar: 0.429448 
l0: 0.297321, l1: 0.299842, l2: 0.303255, l3: 0.306094, l4: 0.331364, l5: 0.409600, l6: 0.602641

[epoch:  79/100, batch:   532/  792, ite: 68834] train loss: 4.237650, tar: 0.429289 
l0: 0.924587, l1: 0.922958, l2: 0.924241, l3: 0.922338, l4: 0.993804, l5: 1.082114, l6: 1.153844

[epoch:  79/100, batch:   534/  792, ite: 68835] train loss: 4.242437, tar: 0.429883 
l0: 0.172527, l1: 0.173755, l2: 0.170909, l3: 0.179973, l4: 0.211807, l5: 0.288778, l6: 0.373849

[epoch:  79/100, batch:   536/  792, ite: 68836] train loss: 4.239767, tar: 0.429575 
l0: 0.280414, l1: 0.282087, l2: 0.285168, l3: 0.297344, l4: 0.328836, l5: 0.490100, l6: 0.561683

[epoch:  79/100, batch:   538/  792, ite: 68837] train loss: 4.238411, tar: 0.42

[epoch:  79/100, batch:   618/  792, ite: 68877] train loss: 4.229655, tar: 0.428168 
l0: 0.418159, l1: 0.423377, l2: 0.426648, l3: 0.435968, l4: 0.452677, l5: 0.515153, l6: 0.673731

[epoch:  79/100, batch:   620/  792, ite: 68878] train loss: 4.229447, tar: 0.428157 
l0: 0.577584, l1: 0.577825, l2: 0.578159, l3: 0.587225, l4: 0.619316, l5: 0.751348, l6: 0.808794

[epoch:  79/100, batch:   622/  792, ite: 68879] train loss: 4.230668, tar: 0.428327 
l0: 0.600141, l1: 0.596610, l2: 0.600250, l3: 0.602072, l4: 0.593847, l5: 0.591834, l6: 0.564052

[epoch:  79/100, batch:   624/  792, ite: 68880] train loss: 4.231328, tar: 0.428522 
l0: 0.157463, l1: 0.164017, l2: 0.163474, l3: 0.171102, l4: 0.194652, l5: 0.296315, l6: 0.403592

[epoch:  79/100, batch:   626/  792, ite: 68881] train loss: 4.228794, tar: 0.428215 
l0: 0.295826, l1: 0.296968, l2: 0.298978, l3: 0.300966, l4: 0.315494, l5: 0.391731, l6: 0.490514

[epoch:  79/100, batch:   628/  792, ite: 68882] train loss: 4.227354, tar: 0.42

[epoch:  79/100, batch:   708/  792, ite: 68922] train loss: 4.237943, tar: 0.429220 
l0: 0.638615, l1: 0.636111, l2: 0.636954, l3: 0.645084, l4: 0.689499, l5: 0.800010, l6: 1.009630

[epoch:  79/100, batch:   710/  792, ite: 68923] train loss: 4.239987, tar: 0.429446 
l0: 0.403526, l1: 0.407708, l2: 0.404712, l3: 0.411981, l4: 0.440372, l5: 0.488615, l6: 0.707792

[epoch:  79/100, batch:   712/  792, ite: 68924] train loss: 4.239717, tar: 0.429418 
l0: 0.874454, l1: 0.878773, l2: 0.878025, l3: 0.880723, l4: 0.878564, l5: 0.996104, l6: 0.992369

[epoch:  79/100, batch:   714/  792, ite: 68925] train loss: 4.243095, tar: 0.429899 
l0: 0.335562, l1: 0.341703, l2: 0.332987, l3: 0.333160, l4: 0.361170, l5: 0.429692, l6: 0.576605

[epoch:  79/100, batch:   716/  792, ite: 68926] train loss: 4.242248, tar: 0.429798 
l0: 0.200340, l1: 0.204336, l2: 0.202309, l3: 0.211424, l4: 0.242138, l5: 0.347031, l6: 0.424626

[epoch:  79/100, batch:   718/  792, ite: 68927] train loss: 4.240160, tar: 0.42

l0: 0.221223, l1: 0.220083, l2: 0.221768, l3: 0.224173, l4: 0.233225, l5: 0.368543, l6: 0.501797

[epoch:  80/100, batch:     6/  792, ite: 68967] train loss: 4.247033, tar: 0.430439 
l0: 0.318748, l1: 0.325203, l2: 0.325527, l3: 0.330471, l4: 0.338717, l5: 0.471934, l6: 0.662199

[epoch:  80/100, batch:     8/  792, ite: 68968] train loss: 4.246282, tar: 0.430323 
l0: 0.551238, l1: 0.556801, l2: 0.554758, l3: 0.548022, l4: 0.542800, l5: 0.541386, l6: 0.865284

[epoch:  80/100, batch:    10/  792, ite: 68969] train loss: 4.247113, tar: 0.430448 
l0: 0.279252, l1: 0.283386, l2: 0.283587, l3: 0.291520, l4: 0.327848, l5: 0.460852, l6: 0.563507

[epoch:  80/100, batch:    12/  792, ite: 68970] train loss: 4.245990, tar: 0.430292 
l0: 0.374349, l1: 0.374841, l2: 0.375833, l3: 0.376555, l4: 0.410805, l5: 0.468129, l6: 0.628834

[epoch:  80/100, batch:    14/  792, ite: 68971] train loss: 4.245429, tar: 0.430235 
l0: 0.361401, l1: 0.366526, l2: 0.364748, l3: 0.370490, l4: 0.406860, l5: 0.4783

[epoch:  80/100, batch:    94/  792, ite: 69011] train loss: 4.253535, tar: 0.431033 
l0: 0.360141, l1: 0.362371, l2: 0.361640, l3: 0.362594, l4: 0.406669, l5: 0.499008, l6: 0.645070

[epoch:  80/100, batch:    96/  792, ite: 69012] train loss: 4.253021, tar: 0.430963 
l0: 0.286682, l1: 0.290032, l2: 0.286843, l3: 0.288033, l4: 0.326210, l5: 0.427535, l6: 0.549731

[epoch:  80/100, batch:    98/  792, ite: 69013] train loss: 4.251744, tar: 0.430821 
l0: 0.814767, l1: 0.820355, l2: 0.821761, l3: 0.822671, l4: 0.862688, l5: 1.048836, l6: 1.295134

[epoch:  80/100, batch:   100/  792, ite: 69014] train loss: 4.255225, tar: 0.431199 
l0: 0.248920, l1: 0.254998, l2: 0.255939, l3: 0.263318, l4: 0.268353, l5: 0.384660, l6: 0.557260

[epoch:  80/100, batch:   102/  792, ite: 69015] train loss: 4.253767, tar: 0.431020 
l0: 0.187436, l1: 0.191352, l2: 0.192148, l3: 0.197290, l4: 0.237143, l5: 0.303818, l6: 0.407438

[epoch:  80/100, batch:   104/  792, ite: 69016] train loss: 4.251723, tar: 0.43

[epoch:  80/100, batch:   184/  792, ite: 69056] train loss: 4.243581, tar: 0.429979 
l0: 0.244170, l1: 0.241585, l2: 0.242882, l3: 0.250871, l4: 0.292597, l5: 0.410918, l6: 0.485865

[epoch:  80/100, batch:   186/  792, ite: 69057] train loss: 4.242147, tar: 0.429803 
l0: 0.488719, l1: 0.494135, l2: 0.496186, l3: 0.489794, l4: 0.520325, l5: 0.617213, l6: 0.839775

[epoch:  80/100, batch:   188/  792, ite: 69058] train loss: 4.242710, tar: 0.429859 
l0: 1.273890, l1: 1.277559, l2: 1.274660, l3: 1.284079, l4: 1.348637, l5: 1.424962, l6: 1.535039

[epoch:  80/100, batch:   190/  792, ite: 69059] train loss: 4.248984, tar: 0.430656 
l0: 0.383297, l1: 0.384268, l2: 0.377620, l3: 0.364190, l4: 0.381432, l5: 0.421099, l6: 0.510395

[epoch:  80/100, batch:   192/  792, ite: 69060] train loss: 4.248160, tar: 0.430611 
l0: 0.258926, l1: 0.257525, l2: 0.256075, l3: 0.260286, l4: 0.286464, l5: 0.400095, l6: 0.608195

[epoch:  80/100, batch:   194/  792, ite: 69061] train loss: 4.246974, tar: 0.43

[epoch:  80/100, batch:   274/  792, ite: 69101] train loss: 4.239705, tar: 0.429704 
l0: 0.726818, l1: 0.737909, l2: 0.737979, l3: 0.735075, l4: 0.743106, l5: 0.779872, l6: 0.862654

[epoch:  80/100, batch:   276/  792, ite: 69102] train loss: 4.241595, tar: 0.429974 
l0: 0.523112, l1: 0.525966, l2: 0.526333, l3: 0.525690, l4: 0.553914, l5: 0.629479, l6: 0.720641

[epoch:  80/100, batch:   278/  792, ite: 69103] train loss: 4.242092, tar: 0.430058 
l0: 0.274907, l1: 0.274708, l2: 0.272963, l3: 0.278722, l4: 0.318345, l5: 0.443315, l6: 0.704970

[epoch:  80/100, batch:   280/  792, ite: 69104] train loss: 4.241244, tar: 0.429918 
l0: 0.388247, l1: 0.392782, l2: 0.390291, l3: 0.400786, l4: 0.424146, l5: 0.470655, l6: 0.684194

[epoch:  80/100, batch:   282/  792, ite: 69105] train loss: 4.240922, tar: 0.429880 
l0: 0.365775, l1: 0.366513, l2: 0.368789, l3: 0.378829, l4: 0.406051, l5: 0.531005, l6: 0.650859

[epoch:  80/100, batch:   284/  792, ite: 69106] train loss: 4.240544, tar: 0.42

[epoch:  80/100, batch:   364/  792, ite: 69146] train loss: 4.242865, tar: 0.430143 
l0: 0.596946, l1: 0.606762, l2: 0.606493, l3: 0.616678, l4: 0.625809, l5: 0.625015, l6: 0.805461

[epoch:  80/100, batch:   366/  792, ite: 69147] train loss: 4.243828, tar: 0.430288 
l0: 0.291681, l1: 0.297503, l2: 0.297053, l3: 0.301860, l4: 0.316156, l5: 0.419161, l6: 0.602187

[epoch:  80/100, batch:   368/  792, ite: 69148] train loss: 4.242957, tar: 0.430168 
l0: 0.608575, l1: 0.616073, l2: 0.619631, l3: 0.623074, l4: 0.662915, l5: 0.771489, l6: 0.703882

[epoch:  80/100, batch:   370/  792, ite: 69149] train loss: 4.244039, tar: 0.430323 
l0: 0.841823, l1: 0.857144, l2: 0.895346, l3: 0.915996, l4: 0.925536, l5: 1.063932, l6: 0.904159

[epoch:  80/100, batch:   372/  792, ite: 69150] train loss: 4.246824, tar: 0.430681 
l0: 0.208509, l1: 0.208332, l2: 0.210690, l3: 0.213289, l4: 0.265674, l5: 0.362048, l6: 0.545333

[epoch:  80/100, batch:   374/  792, ite: 69151] train loss: 4.245388, tar: 0.43

[epoch:  80/100, batch:   454/  792, ite: 69191] train loss: 4.239614, tar: 0.430139 
l0: 0.376993, l1: 0.383776, l2: 0.381733, l3: 0.387353, l4: 0.426640, l5: 0.605638, l6: 0.778120

[epoch:  80/100, batch:   456/  792, ite: 69192] train loss: 4.239550, tar: 0.430094 
l0: 0.542618, l1: 0.544715, l2: 0.537078, l3: 0.530617, l4: 0.519904, l5: 0.610516, l6: 0.729497

[epoch:  80/100, batch:   458/  792, ite: 69193] train loss: 4.240035, tar: 0.430188 
l0: 0.458691, l1: 0.456961, l2: 0.460292, l3: 0.458096, l4: 0.482838, l5: 0.613985, l6: 0.780752

[epoch:  80/100, batch:   460/  792, ite: 69194] train loss: 4.240299, tar: 0.430212 
l0: 0.302800, l1: 0.308908, l2: 0.307639, l3: 0.309761, l4: 0.327785, l5: 0.433070, l6: 0.586791

[epoch:  80/100, batch:   462/  792, ite: 69195] train loss: 4.239403, tar: 0.430106 
l0: 0.367583, l1: 0.388307, l2: 0.381109, l3: 0.395316, l4: 0.443881, l5: 0.533634, l6: 0.844929

[epoch:  80/100, batch:   464/  792, ite: 69196] train loss: 4.239456, tar: 0.43

[epoch:  80/100, batch:   544/  792, ite: 69236] train loss: 4.241245, tar: 0.430052 
l0: 0.824808, l1: 0.836601, l2: 0.833808, l3: 0.828627, l4: 0.871491, l5: 0.940781, l6: 1.058893

[epoch:  80/100, batch:   546/  792, ite: 69237] train loss: 4.243750, tar: 0.430371 
l0: 0.201672, l1: 0.205247, l2: 0.205827, l3: 0.202339, l4: 0.239848, l5: 0.310441, l6: 0.488755

[epoch:  80/100, batch:   548/  792, ite: 69238] train loss: 4.242221, tar: 0.430186 
l0: 0.208006, l1: 0.217010, l2: 0.215255, l3: 0.214404, l4: 0.252003, l5: 0.343316, l6: 0.454608

[epoch:  80/100, batch:   550/  792, ite: 69239] train loss: 4.240766, tar: 0.430007 
l0: 0.385983, l1: 0.386641, l2: 0.386026, l3: 0.395677, l4: 0.424057, l5: 0.522938, l6: 0.661345

[epoch:  80/100, batch:   552/  792, ite: 69240] train loss: 4.240484, tar: 0.429971 
l0: 0.311103, l1: 0.311942, l2: 0.311315, l3: 0.315907, l4: 0.348959, l5: 0.406604, l6: 0.500584

[epoch:  80/100, batch:   554/  792, ite: 69241] train loss: 4.239509, tar: 0.42

[epoch:  80/100, batch:   634/  792, ite: 69281] train loss: 4.243864, tar: 0.430530 
l0: 0.862376, l1: 0.889048, l2: 0.896789, l3: 0.882676, l4: 0.927410, l5: 0.901208, l6: 0.932213

[epoch:  80/100, batch:   636/  792, ite: 69282] train loss: 4.246223, tar: 0.430867 
l0: 0.478899, l1: 0.476436, l2: 0.476091, l3: 0.473310, l4: 0.482087, l5: 0.596197, l6: 0.616084

[epoch:  80/100, batch:   638/  792, ite: 69283] train loss: 4.246236, tar: 0.430904 
l0: 0.190431, l1: 0.190648, l2: 0.188781, l3: 0.196994, l4: 0.237509, l5: 0.325106, l6: 0.384402

[epoch:  80/100, batch:   640/  792, ite: 69284] train loss: 4.244615, tar: 0.430717 
l0: 0.311514, l1: 0.316627, l2: 0.315148, l3: 0.321952, l4: 0.347960, l5: 0.495217, l6: 0.601933

[epoch:  80/100, batch:   642/  792, ite: 69285] train loss: 4.243964, tar: 0.430624 
l0: 0.772719, l1: 0.771796, l2: 0.772318, l3: 0.773185, l4: 0.793380, l5: 0.855648, l6: 1.036862

[epoch:  80/100, batch:   644/  792, ite: 69286] train loss: 4.245961, tar: 0.43

[epoch:  80/100, batch:   724/  792, ite: 69326] train loss: 4.241114, tar: 0.430286 
l0: 0.259260, l1: 0.261073, l2: 0.263874, l3: 0.261479, l4: 0.287879, l5: 0.347840, l6: 0.453547

[epoch:  80/100, batch:   726/  792, ite: 69327] train loss: 4.239888, tar: 0.430158 
l0: 0.283858, l1: 0.287478, l2: 0.289371, l3: 0.292930, l4: 0.315466, l5: 0.394434, l6: 0.552188

[epoch:  80/100, batch:   728/  792, ite: 69328] train loss: 4.238970, tar: 0.430047 
l0: 0.338842, l1: 0.342031, l2: 0.337477, l3: 0.331916, l4: 0.350279, l5: 0.445686, l6: 0.545699

[epoch:  80/100, batch:   730/  792, ite: 69329] train loss: 4.238233, tar: 0.429979 
l0: 0.293660, l1: 0.293302, l2: 0.294225, l3: 0.298083, l4: 0.320402, l5: 0.436384, l6: 0.657833

[epoch:  80/100, batch:   732/  792, ite: 69330] train loss: 4.237544, tar: 0.429876 
l0: 0.324293, l1: 0.328290, l2: 0.328304, l3: 0.326455, l4: 0.348882, l5: 0.399406, l6: 0.513300

[epoch:  80/100, batch:   734/  792, ite: 69331] train loss: 4.236707, tar: 0.42

l0: 0.441245, l1: 0.445860, l2: 0.445607, l3: 0.440094, l4: 0.466778, l5: 0.620659, l6: 0.788321

[epoch:  81/100, batch:    22/  792, ite: 69371] train loss: 4.246729, tar: 0.431005 
l0: 0.499055, l1: 0.481959, l2: 0.484797, l3: 0.483852, l4: 0.523235, l5: 0.558056, l6: 0.627911

[epoch:  81/100, batch:    24/  792, ite: 69372] train loss: 4.246882, tar: 0.431054 
l0: 0.239941, l1: 0.242926, l2: 0.244593, l3: 0.250071, l4: 0.290232, l5: 0.380488, l6: 0.479611

[epoch:  81/100, batch:    26/  792, ite: 69373] train loss: 4.245712, tar: 0.430915 
l0: 0.425303, l1: 0.430001, l2: 0.431236, l3: 0.428437, l4: 0.451872, l5: 0.569299, l6: 0.745367

[epoch:  81/100, batch:    28/  792, ite: 69374] train loss: 4.245731, tar: 0.430911 
l0: 0.345363, l1: 0.343403, l2: 0.343590, l3: 0.344417, l4: 0.375968, l5: 0.418663, l6: 0.660623

[epoch:  81/100, batch:    30/  792, ite: 69375] train loss: 4.245210, tar: 0.430849 
l0: 0.398918, l1: 0.389317, l2: 0.388730, l3: 0.390181, l4: 0.423604, l5: 0.6055

[epoch:  81/100, batch:   110/  792, ite: 69415] train loss: 4.238993, tar: 0.429880 
l0: 0.293178, l1: 0.290571, l2: 0.290194, l3: 0.298383, l4: 0.327186, l5: 0.385818, l6: 0.614269

[epoch:  81/100, batch:   112/  792, ite: 69416] train loss: 4.238274, tar: 0.429783 
l0: 0.481475, l1: 0.475707, l2: 0.479945, l3: 0.494788, l4: 0.545905, l5: 0.639983, l6: 0.873057

[epoch:  81/100, batch:   114/  792, ite: 69417] train loss: 4.238707, tar: 0.429820 
l0: 0.551845, l1: 0.556137, l2: 0.555109, l3: 0.557179, l4: 0.600624, l5: 0.706024, l6: 0.794808

[epoch:  81/100, batch:   116/  792, ite: 69418] train loss: 4.239350, tar: 0.429906 
l0: 0.463661, l1: 0.461388, l2: 0.462716, l3: 0.469349, l4: 0.483305, l5: 0.518231, l6: 0.619205

[epoch:  81/100, batch:   118/  792, ite: 69419] train loss: 4.239280, tar: 0.429929 
l0: 0.314824, l1: 0.318018, l2: 0.313731, l3: 0.310373, l4: 0.334244, l5: 0.347878, l6: 0.496982

[epoch:  81/100, batch:   120/  792, ite: 69420] train loss: 4.238400, tar: 0.42

[epoch:  81/100, batch:   200/  792, ite: 69460] train loss: 4.240671, tar: 0.430280 
l0: 0.394870, l1: 0.396240, l2: 0.395825, l3: 0.405036, l4: 0.435129, l5: 0.545048, l6: 0.633958

[epoch:  81/100, batch:   202/  792, ite: 69461] train loss: 4.240446, tar: 0.430256 
l0: 0.441014, l1: 0.437681, l2: 0.436169, l3: 0.437935, l4: 0.450468, l5: 0.579600, l6: 0.765397

[epoch:  81/100, batch:   204/  792, ite: 69462] train loss: 4.240642, tar: 0.430263 
l0: 0.372917, l1: 0.376278, l2: 0.375831, l3: 0.384859, l4: 0.414949, l5: 0.516689, l6: 0.721206

[epoch:  81/100, batch:   206/  792, ite: 69463] train loss: 4.240382, tar: 0.430224 
l0: 0.459753, l1: 0.462851, l2: 0.465419, l3: 0.468905, l4: 0.501126, l5: 0.578817, l6: 0.778988

[epoch:  81/100, batch:   208/  792, ite: 69464] train loss: 4.240678, tar: 0.430244 
l0: 0.287127, l1: 0.289529, l2: 0.287045, l3: 0.296980, l4: 0.318212, l5: 0.356486, l6: 0.564538

[epoch:  81/100, batch:   210/  792, ite: 69465] train loss: 4.239824, tar: 0.43

[epoch:  81/100, batch:   290/  792, ite: 69505] train loss: 4.235961, tar: 0.429693 
l0: 0.521182, l1: 0.524347, l2: 0.522829, l3: 0.523226, l4: 0.528707, l5: 0.595628, l6: 0.622621

[epoch:  81/100, batch:   292/  792, ite: 69506] train loss: 4.236104, tar: 0.429754 
l0: 0.513774, l1: 0.516834, l2: 0.515387, l3: 0.513956, l4: 0.549816, l5: 0.602044, l6: 0.723220

[epoch:  81/100, batch:   294/  792, ite: 69507] train loss: 4.236402, tar: 0.429809 
l0: 0.246361, l1: 0.255998, l2: 0.256879, l3: 0.261843, l4: 0.297545, l5: 0.368582, l6: 0.529090

[epoch:  81/100, batch:   296/  792, ite: 69508] train loss: 4.235474, tar: 0.429688 
l0: 0.418686, l1: 0.423980, l2: 0.424024, l3: 0.423409, l4: 0.433351, l5: 0.539304, l6: 0.773428

[epoch:  81/100, batch:   298/  792, ite: 69509] train loss: 4.235574, tar: 0.429680 
l0: 0.854633, l1: 0.864150, l2: 0.864824, l3: 0.876037, l4: 0.908412, l5: 0.917451, l6: 1.005260

[epoch:  81/100, batch:   300/  792, ite: 69510] train loss: 4.237625, tar: 0.42

[epoch:  81/100, batch:   380/  792, ite: 69550] train loss: 4.241501, tar: 0.430422 
l0: 0.525683, l1: 0.539787, l2: 0.541941, l3: 0.548501, l4: 0.583205, l5: 0.675595, l6: 0.749651

[epoch:  81/100, batch:   382/  792, ite: 69551] train loss: 4.241988, tar: 0.430484 
l0: 0.599555, l1: 0.605521, l2: 0.604140, l3: 0.606081, l4: 0.605644, l5: 0.712977, l6: 0.783073

[epoch:  81/100, batch:   384/  792, ite: 69552] train loss: 4.242660, tar: 0.430593 
l0: 0.445859, l1: 0.448186, l2: 0.446186, l3: 0.446869, l4: 0.471768, l5: 0.537311, l6: 0.740488

[epoch:  81/100, batch:   386/  792, ite: 69553] train loss: 4.242642, tar: 0.430603 
l0: 0.493292, l1: 0.493110, l2: 0.489760, l3: 0.488855, l4: 0.502548, l5: 0.552976, l6: 0.624706

[epoch:  81/100, batch:   388/  792, ite: 69554] train loss: 4.242809, tar: 0.430643 
l0: 0.290872, l1: 0.291582, l2: 0.291902, l3: 0.294841, l4: 0.326978, l5: 0.497827, l6: 0.656976

[epoch:  81/100, batch:   390/  792, ite: 69555] train loss: 4.242267, tar: 0.43

[epoch:  81/100, batch:   470/  792, ite: 69595] train loss: 4.246905, tar: 0.431163 
l0: 0.330741, l1: 0.334418, l2: 0.334886, l3: 0.331660, l4: 0.371160, l5: 0.555829, l6: 0.602501

[epoch:  81/100, batch:   472/  792, ite: 69596] train loss: 4.246461, tar: 0.431100 
l0: 0.339237, l1: 0.344346, l2: 0.345106, l3: 0.349702, l4: 0.381073, l5: 0.466638, l6: 0.609890

[epoch:  81/100, batch:   474/  792, ite: 69597] train loss: 4.246001, tar: 0.431043 
l0: 0.498302, l1: 0.506545, l2: 0.505708, l3: 0.496651, l4: 0.531963, l5: 0.630254, l6: 0.630191

[epoch:  81/100, batch:   476/  792, ite: 69598] train loss: 4.246150, tar: 0.431085 
l0: 0.346224, l1: 0.350055, l2: 0.350495, l3: 0.348381, l4: 0.381410, l5: 0.543451, l6: 0.683856

[epoch:  81/100, batch:   478/  792, ite: 69599] train loss: 4.245843, tar: 0.431032 
l0: 0.361334, l1: 0.359794, l2: 0.359495, l3: 0.363650, l4: 0.378454, l5: 0.428575, l6: 0.526273

[epoch:  81/100, batch:   480/  792, ite: 69600] train loss: 4.245246, tar: 0.43

[epoch:  81/100, batch:   560/  792, ite: 69640] train loss: 4.243711, tar: 0.430644 
l0: 0.521258, l1: 0.526440, l2: 0.526033, l3: 0.530686, l4: 0.553998, l5: 0.722140, l6: 0.961845

[epoch:  81/100, batch:   562/  792, ite: 69641] train loss: 4.244350, tar: 0.430699 
l0: 0.324431, l1: 0.324777, l2: 0.325423, l3: 0.325127, l4: 0.363888, l5: 0.534976, l6: 0.721981

[epoch:  81/100, batch:   564/  792, ite: 69642] train loss: 4.243997, tar: 0.430634 
l0: 0.472476, l1: 0.473724, l2: 0.471680, l3: 0.471652, l4: 0.496306, l5: 0.594218, l6: 0.856236

[epoch:  81/100, batch:   566/  792, ite: 69643] train loss: 4.244266, tar: 0.430660 
l0: 0.390361, l1: 0.393154, l2: 0.393000, l3: 0.395883, l4: 0.432725, l5: 0.495279, l6: 0.670409

[epoch:  81/100, batch:   568/  792, ite: 69644] train loss: 4.244085, tar: 0.430635 
l0: 0.459058, l1: 0.466083, l2: 0.467883, l3: 0.468827, l4: 0.485441, l5: 0.655113, l6: 0.822766

[epoch:  81/100, batch:   570/  792, ite: 69645] train loss: 4.244332, tar: 0.43

[epoch:  81/100, batch:   650/  792, ite: 69685] train loss: 4.236207, tar: 0.429752 
l0: 0.811556, l1: 0.814604, l2: 0.811626, l3: 0.809930, l4: 0.813030, l5: 0.840293, l6: 0.852554

[epoch:  81/100, batch:   652/  792, ite: 69686] train loss: 4.237655, tar: 0.429978 
l0: 0.353498, l1: 0.354084, l2: 0.351363, l3: 0.347447, l4: 0.393628, l5: 0.569803, l6: 0.630879

[epoch:  81/100, batch:   654/  792, ite: 69687] train loss: 4.237307, tar: 0.429933 
l0: 0.315393, l1: 0.318139, l2: 0.319286, l3: 0.318891, l4: 0.315342, l5: 0.401500, l6: 0.558357

[epoch:  81/100, batch:   656/  792, ite: 69688] train loss: 4.236660, tar: 0.429865 
l0: 0.450570, l1: 0.448453, l2: 0.449439, l3: 0.451451, l4: 0.452294, l5: 0.562665, l6: 0.644945

[epoch:  81/100, batch:   658/  792, ite: 69689] train loss: 4.236667, tar: 0.429877 
l0: 0.401809, l1: 0.404995, l2: 0.400562, l3: 0.407197, l4: 0.455944, l5: 0.559386, l6: 0.808528

[epoch:  81/100, batch:   660/  792, ite: 69690] train loss: 4.236766, tar: 0.42

[epoch:  81/100, batch:   740/  792, ite: 69730] train loss: 4.237404, tar: 0.429732 
l0: 0.191370, l1: 0.196259, l2: 0.194745, l3: 0.196639, l4: 0.240402, l5: 0.437160, l6: 0.543214

[epoch:  81/100, batch:   742/  792, ite: 69731] train loss: 4.236450, tar: 0.429594 
l0: 0.377488, l1: 0.377931, l2: 0.376622, l3: 0.383451, l4: 0.411757, l5: 0.609048, l6: 0.909190

[epoch:  81/100, batch:   744/  792, ite: 69732] train loss: 4.236493, tar: 0.429564 
l0: 0.275842, l1: 0.283537, l2: 0.284781, l3: 0.294094, l4: 0.313068, l5: 0.506749, l6: 0.602509

[epoch:  81/100, batch:   746/  792, ite: 69733] train loss: 4.235926, tar: 0.429475 
l0: 0.300716, l1: 0.306757, l2: 0.307425, l3: 0.313891, l4: 0.349968, l5: 0.436333, l6: 0.540029

[epoch:  81/100, batch:   748/  792, ite: 69734] train loss: 4.235267, tar: 0.429401 
l0: 0.797965, l1: 0.805367, l2: 0.797263, l3: 0.797581, l4: 0.819037, l5: 0.897852, l6: 0.887521

[epoch:  81/100, batch:   750/  792, ite: 69735] train loss: 4.236734, tar: 0.42

l0: 0.549358, l1: 0.551861, l2: 0.547678, l3: 0.536434, l4: 0.571710, l5: 0.725623, l6: 0.908272

[epoch:  82/100, batch:    38/  792, ite: 69775] train loss: 4.227539, tar: 0.428488 
l0: 0.332704, l1: 0.332209, l2: 0.334740, l3: 0.345536, l4: 0.362099, l5: 0.490257, l6: 0.604381

[epoch:  82/100, batch:    40/  792, ite: 69776] train loss: 4.227103, tar: 0.428434 
l0: 0.389604, l1: 0.386035, l2: 0.385378, l3: 0.389231, l4: 0.433384, l5: 0.499348, l6: 0.628111

[epoch:  82/100, batch:    42/  792, ite: 69777] train loss: 4.226842, tar: 0.428412 
l0: 0.397617, l1: 0.395145, l2: 0.401255, l3: 0.410306, l4: 0.445177, l5: 0.534760, l6: 0.743096

[epoch:  82/100, batch:    44/  792, ite: 69778] train loss: 4.226701, tar: 0.428395 
l0: 0.463102, l1: 0.465711, l2: 0.462882, l3: 0.465657, l4: 0.506971, l5: 0.603929, l6: 0.838062

[epoch:  82/100, batch:    46/  792, ite: 69779] train loss: 4.226944, tar: 0.428415 
l0: 0.665020, l1: 0.669781, l2: 0.669094, l3: 0.672884, l4: 0.681492, l5: 0.7847

[epoch:  82/100, batch:   126/  792, ite: 69819] train loss: 4.234978, tar: 0.429364 
l0: 0.535918, l1: 0.538277, l2: 0.540302, l3: 0.544176, l4: 0.594424, l5: 0.788101, l6: 1.033696

[epoch:  82/100, batch:   128/  792, ite: 69820] train loss: 4.235747, tar: 0.429422 
l0: 0.500627, l1: 0.503823, l2: 0.504716, l3: 0.503533, l4: 0.532063, l5: 0.627523, l6: 0.906191

[epoch:  82/100, batch:   130/  792, ite: 69821] train loss: 4.236205, tar: 0.429461 
l0: 0.321334, l1: 0.323322, l2: 0.325096, l3: 0.332918, l4: 0.332456, l5: 0.422143, l6: 0.576646

[epoch:  82/100, batch:   132/  792, ite: 69822] train loss: 4.235641, tar: 0.429402 
l0: 0.491153, l1: 0.494260, l2: 0.496225, l3: 0.497770, l4: 0.518101, l5: 0.523679, l6: 0.654194

[epoch:  82/100, batch:   134/  792, ite: 69823] train loss: 4.235726, tar: 0.429436 
l0: 0.240114, l1: 0.245960, l2: 0.241945, l3: 0.246958, l4: 0.274820, l5: 0.395619, l6: 0.566881

[epoch:  82/100, batch:   136/  792, ite: 69824] train loss: 4.234962, tar: 0.42

[epoch:  82/100, batch:   216/  792, ite: 69864] train loss: 4.231854, tar: 0.428969 
l0: 0.352154, l1: 0.353596, l2: 0.352593, l3: 0.360794, l4: 0.433598, l5: 0.552356, l6: 0.637437

[epoch:  82/100, batch:   218/  792, ite: 69865] train loss: 4.231550, tar: 0.428928 
l0: 0.436951, l1: 0.439322, l2: 0.439638, l3: 0.440980, l4: 0.470653, l5: 0.551028, l6: 0.727675

[epoch:  82/100, batch:   220/  792, ite: 69866] train loss: 4.231582, tar: 0.428932 
l0: 0.401177, l1: 0.403979, l2: 0.402511, l3: 0.399723, l4: 0.429526, l5: 0.449540, l6: 0.722071

[epoch:  82/100, batch:   222/  792, ite: 69867] train loss: 4.231404, tar: 0.428917 
l0: 0.400588, l1: 0.403314, l2: 0.406244, l3: 0.407501, l4: 0.432796, l5: 0.437969, l6: 0.493878

[epoch:  82/100, batch:   224/  792, ite: 69868] train loss: 4.231045, tar: 0.428902 
l0: 0.273226, l1: 0.270366, l2: 0.272425, l3: 0.274312, l4: 0.279970, l5: 0.380615, l6: 0.503301

[epoch:  82/100, batch:   226/  792, ite: 69869] train loss: 4.230263, tar: 0.42

[epoch:  82/100, batch:   306/  792, ite: 69909] train loss: 4.226900, tar: 0.428285 
l0: 0.859249, l1: 0.879155, l2: 0.878878, l3: 0.879662, l4: 0.912882, l5: 1.018754, l6: 1.265393

[epoch:  82/100, batch:   308/  792, ite: 69910] train loss: 4.228964, tar: 0.428511 
l0: 0.827672, l1: 0.827340, l2: 0.827922, l3: 0.835306, l4: 0.846298, l5: 0.940623, l6: 1.014898

[epoch:  82/100, batch:   310/  792, ite: 69911] train loss: 4.230599, tar: 0.428719 
l0: 0.832448, l1: 0.836546, l2: 0.831610, l3: 0.830664, l4: 0.849396, l5: 0.955734, l6: 1.086991

[epoch:  82/100, batch:   312/  792, ite: 69912] train loss: 4.232282, tar: 0.428931 
l0: 0.254069, l1: 0.261192, l2: 0.261590, l3: 0.264664, l4: 0.274873, l5: 0.377015, l6: 0.571824

[epoch:  82/100, batch:   314/  792, ite: 69913] train loss: 4.231596, tar: 0.428839 
l0: 0.811032, l1: 0.812846, l2: 0.799967, l3: 0.791666, l4: 0.799256, l5: 0.845458, l6: 0.940980

[epoch:  82/100, batch:   316/  792, ite: 69914] train loss: 4.232958, tar: 0.42

[epoch:  82/100, batch:   396/  792, ite: 69954] train loss: 4.232656, tar: 0.428988 
l0: 0.248738, l1: 0.247818, l2: 0.246374, l3: 0.245388, l4: 0.258828, l5: 0.345237, l6: 0.519388

[epoch:  82/100, batch:   398/  792, ite: 69955] train loss: 4.231902, tar: 0.428895 
l0: 0.263907, l1: 0.263657, l2: 0.265105, l3: 0.268751, l4: 0.331099, l5: 0.528773, l6: 0.715619

[epoch:  82/100, batch:   400/  792, ite: 69956] train loss: 4.231501, tar: 0.428811 
l0: 0.183037, l1: 0.186263, l2: 0.188728, l3: 0.198356, l4: 0.270618, l5: 0.409088, l6: 0.659689

[epoch:  82/100, batch:   402/  792, ite: 69957] train loss: 4.230713, tar: 0.428685 
l0: 0.876885, l1: 0.871052, l2: 0.870511, l3: 0.873365, l4: 0.909383, l5: 1.032582, l6: 1.070234

[epoch:  82/100, batch:   404/  792, ite: 69958] train loss: 4.232404, tar: 0.428914 
l0: 0.572973, l1: 0.584378, l2: 0.585205, l3: 0.581465, l4: 0.556986, l5: 0.578470, l6: 0.736621

[epoch:  82/100, batch:   406/  792, ite: 69959] train loss: 4.232829, tar: 0.42

[epoch:  82/100, batch:   486/  792, ite: 69999] train loss: 4.241018, tar: 0.429963 
l0: 0.433217, l1: 0.439400, l2: 0.439473, l3: 0.448906, l4: 0.477183, l5: 0.554739, l6: 0.582412

[epoch:  82/100, batch:   488/  792, ite: 70000] train loss: 4.240915, tar: 0.429964 
l0: 0.197189, l1: 0.198395, l2: 0.197168, l3: 0.209099, l4: 0.232096, l5: 0.364644, l6: 0.513050

[epoch:  82/100, batch:   490/  792, ite: 70001] train loss: 2.428705, tar: 0.197189 
l0: 0.217574, l1: 0.221207, l2: 0.221893, l3: 0.228844, l4: 0.250290, l5: 0.403029, l6: 0.518296

[epoch:  82/100, batch:   492/  792, ite: 70002] train loss: 2.520805, tar: 0.207382 
l0: 0.304261, l1: 0.313350, l2: 0.310989, l3: 0.317457, l4: 0.348409, l5: 0.454347, l6: 0.566199

[epoch:  82/100, batch:   494/  792, ite: 70003] train loss: 2.755939, tar: 0.239675 
l0: 0.289530, l1: 0.289785, l2: 0.289704, l3: 0.298818, l4: 0.343406, l5: 0.456011, l6: 0.596631

[epoch:  82/100, batch:   496/  792, ite: 70004] train loss: 2.920862, tar: 0.25

[epoch:  82/100, batch:   576/  792, ite: 70044] train loss: 4.178963, tar: 0.435139 
l0: 0.161638, l1: 0.166876, l2: 0.165935, l3: 0.176885, l4: 0.190601, l5: 0.276940, l6: 0.409389

[epoch:  82/100, batch:   578/  792, ite: 70045] train loss: 4.129980, tar: 0.429061 
l0: 0.576248, l1: 0.583766, l2: 0.587266, l3: 0.604945, l4: 0.599417, l5: 0.620931, l6: 0.652183

[epoch:  82/100, batch:   580/  792, ite: 70046] train loss: 4.148106, tar: 0.432261 
l0: 0.548569, l1: 0.549321, l2: 0.552133, l3: 0.555041, l4: 0.568625, l5: 0.573057, l6: 0.678664

[epoch:  82/100, batch:   582/  792, ite: 70047] train loss: 4.162230, tar: 0.434736 
l0: 0.261030, l1: 0.259324, l2: 0.256725, l3: 0.264788, l4: 0.305174, l5: 0.403893, l6: 0.573081

[epoch:  82/100, batch:   584/  792, ite: 70048] train loss: 4.134966, tar: 0.431117 
l0: 0.424781, l1: 0.424190, l2: 0.420026, l3: 0.426060, l4: 0.436554, l5: 0.526627, l6: 0.578825

[epoch:  82/100, batch:   586/  792, ite: 70049] train loss: 4.130805, tar: 0.43

[epoch:  82/100, batch:   666/  792, ite: 70089] train loss: 4.147568, tar: 0.424616 
l0: 0.344110, l1: 0.337966, l2: 0.341868, l3: 0.343525, l4: 0.352143, l5: 0.471043, l6: 0.604256

[epoch:  82/100, batch:   668/  792, ite: 70090] train loss: 4.139487, tar: 0.423721 
l0: 0.243526, l1: 0.246668, l2: 0.248276, l3: 0.252861, l4: 0.282810, l5: 0.511107, l6: 0.602320

[epoch:  82/100, batch:   670/  792, ite: 70091] train loss: 4.126941, tar: 0.421741 
l0: 0.502322, l1: 0.514327, l2: 0.511429, l3: 0.534773, l4: 0.593565, l5: 0.744367, l6: 0.838482

[epoch:  82/100, batch:   672/  792, ite: 70092] train loss: 4.138319, tar: 0.422617 
l0: 0.191052, l1: 0.190430, l2: 0.193190, l3: 0.207925, l4: 0.231281, l5: 0.366307, l6: 0.446807

[epoch:  82/100, batch:   674/  792, ite: 70093] train loss: 4.118368, tar: 0.420127 
l0: 0.483123, l1: 0.487231, l2: 0.487561, l3: 0.489421, l4: 0.494458, l5: 0.564267, l6: 0.738871

[epoch:  82/100, batch:   676/  792, ite: 70094] train loss: 4.122533, tar: 0.42

[epoch:  82/100, batch:   756/  792, ite: 70134] train loss: 4.007049, tar: 0.402279 
l0: 0.377762, l1: 0.375364, l2: 0.375915, l3: 0.376119, l4: 0.386346, l5: 0.423207, l6: 0.514644

[epoch:  82/100, batch:   758/  792, ite: 70135] train loss: 4.002662, tar: 0.402097 
l0: 0.569306, l1: 0.582255, l2: 0.580941, l3: 0.587932, l4: 0.607032, l5: 0.737552, l6: 0.892904

[epoch:  82/100, batch:   760/  792, ite: 70136] train loss: 4.013601, tar: 0.403327 
l0: 0.346141, l1: 0.351603, l2: 0.350755, l3: 0.342103, l4: 0.374130, l5: 0.448623, l6: 0.500346

[epoch:  82/100, batch:   762/  792, ite: 70137] train loss: 4.008180, tar: 0.402909 
l0: 1.480335, l1: 1.487780, l2: 1.470246, l3: 1.503517, l4: 1.538327, l5: 1.580568, l6: 1.741224

[epoch:  82/100, batch:   764/  792, ite: 70138] train loss: 4.070310, tar: 0.410717 
l0: 0.178608, l1: 0.183824, l2: 0.183499, l3: 0.184946, l4: 0.203744, l5: 0.309775, l6: 0.358875

[epoch:  82/100, batch:   766/  792, ite: 70139] train loss: 4.055634, tar: 0.40

l0: 0.535006, l1: 0.534088, l2: 0.532398, l3: 0.540125, l4: 0.586389, l5: 0.763931, l6: 1.002801

[epoch:  83/100, batch:    54/  792, ite: 70179] train loss: 4.118682, tar: 0.416805 
l0: 0.344019, l1: 0.339294, l2: 0.339468, l3: 0.346772, l4: 0.350675, l5: 0.426823, l6: 0.593874

[epoch:  83/100, batch:    56/  792, ite: 70180] train loss: 4.114294, tar: 0.416401 
l0: 0.365591, l1: 0.371042, l2: 0.370121, l3: 0.381048, l4: 0.444610, l5: 0.530579, l6: 0.737455

[epoch:  83/100, batch:    58/  792, ite: 70181] train loss: 4.114382, tar: 0.416120 
l0: 0.253674, l1: 0.250362, l2: 0.255136, l3: 0.264186, l4: 0.308024, l5: 0.448619, l6: 0.667815

[epoch:  83/100, batch:    60/  792, ite: 70182] train loss: 4.109377, tar: 0.415228 
l0: 0.437821, l1: 0.446813, l2: 0.446858, l3: 0.462110, l4: 0.498999, l5: 0.586133, l6: 0.651309

[epoch:  83/100, batch:    62/  792, ite: 70183] train loss: 4.109910, tar: 0.415351 
l0: 0.333387, l1: 0.337295, l2: 0.337427, l3: 0.341201, l4: 0.372033, l5: 0.4486

[epoch:  83/100, batch:   142/  792, ite: 70223] train loss: 4.093524, tar: 0.413510 
l0: 0.824657, l1: 0.834231, l2: 0.830423, l3: 0.832918, l4: 0.859579, l5: 0.915166, l6: 0.999889

[epoch:  83/100, batch:   144/  792, ite: 70224] train loss: 4.107282, tar: 0.415345 
l0: 0.194502, l1: 0.198330, l2: 0.195645, l3: 0.193476, l4: 0.253296, l5: 0.412479, l6: 0.662440

[epoch:  83/100, batch:   146/  792, ite: 70225] train loss: 4.101750, tar: 0.414364 
l0: 0.670599, l1: 0.668305, l2: 0.668912, l3: 0.670386, l4: 0.709891, l5: 0.742274, l6: 0.892775

[epoch:  83/100, batch:   148/  792, ite: 70226] train loss: 4.109920, tar: 0.415498 
l0: 0.304655, l1: 0.305112, l2: 0.307860, l3: 0.310821, l4: 0.347626, l5: 0.455830, l6: 0.605181

[epoch:  83/100, batch:   150/  792, ite: 70227] train loss: 4.106430, tar: 0.415009 
l0: 0.389787, l1: 0.396970, l2: 0.400233, l3: 0.410713, l4: 0.454009, l5: 0.673491, l6: 0.825487

[epoch:  83/100, batch:   152/  792, ite: 70228] train loss: 4.107867, tar: 0.41

[epoch:  83/100, batch:   232/  792, ite: 70268] train loss: 4.124544, tar: 0.416596 
l0: 0.543344, l1: 0.544739, l2: 0.547200, l3: 0.552638, l4: 0.586775, l5: 0.612180, l6: 0.793398

[epoch:  83/100, batch:   234/  792, ite: 70269] train loss: 4.127712, tar: 0.417067 
l0: 0.392740, l1: 0.395855, l2: 0.394807, l3: 0.402701, l4: 0.405402, l5: 0.492200, l6: 0.751492

[epoch:  83/100, batch:   236/  792, ite: 70270] train loss: 4.127274, tar: 0.416977 
l0: 0.519348, l1: 0.521983, l2: 0.522557, l3: 0.527858, l4: 0.551183, l5: 0.724288, l6: 0.883394

[epoch:  83/100, batch:   238/  792, ite: 70271] train loss: 4.131297, tar: 0.417355 
l0: 0.401872, l1: 0.405226, l2: 0.407615, l3: 0.423649, l4: 0.452606, l5: 0.559829, l6: 0.639219

[epoch:  83/100, batch:   240/  792, ite: 70272] train loss: 4.130603, tar: 0.417298 
l0: 0.542180, l1: 0.559205, l2: 0.558582, l3: 0.563299, l4: 0.567555, l5: 0.617736, l6: 0.665161

[epoch:  83/100, batch:   242/  792, ite: 70273] train loss: 4.133197, tar: 0.41

[epoch:  83/100, batch:   322/  792, ite: 70313] train loss: 4.138738, tar: 0.418200 
l0: 0.643266, l1: 0.649426, l2: 0.647716, l3: 0.633634, l4: 0.633729, l5: 0.706775, l6: 0.844448

[epoch:  83/100, batch:   324/  792, ite: 70314] train loss: 4.143782, tar: 0.418916 
l0: 0.279484, l1: 0.294493, l2: 0.299267, l3: 0.305379, l4: 0.354315, l5: 0.443435, l6: 0.654873

[epoch:  83/100, batch:   326/  792, ite: 70315] train loss: 4.141285, tar: 0.418474 
l0: 0.314175, l1: 0.314073, l2: 0.317019, l3: 0.328038, l4: 0.356245, l5: 0.497217, l6: 0.582103

[epoch:  83/100, batch:   328/  792, ite: 70316] train loss: 4.138777, tar: 0.418144 
l0: 0.511136, l1: 0.520633, l2: 0.521560, l3: 0.520107, l4: 0.529944, l5: 0.575518, l6: 0.650693

[epoch:  83/100, batch:   330/  792, ite: 70317] train loss: 4.140021, tar: 0.418437 
l0: 0.755155, l1: 0.756461, l2: 0.762475, l3: 0.756222, l4: 0.763763, l5: 0.812049, l6: 0.954450

[epoch:  83/100, batch:   332/  792, ite: 70318] train loss: 4.148090, tar: 0.41

[epoch:  83/100, batch:   412/  792, ite: 70358] train loss: 4.109449, tar: 0.414495 
l0: 0.681393, l1: 0.675330, l2: 0.685193, l3: 0.709125, l4: 0.759261, l5: 0.832546, l6: 1.247115

[epoch:  83/100, batch:   414/  792, ite: 70359] train loss: 4.117292, tar: 0.415238 
l0: 0.333863, l1: 0.338008, l2: 0.340816, l3: 0.331716, l4: 0.349418, l5: 0.455210, l6: 0.633740

[epoch:  83/100, batch:   416/  792, ite: 70360] train loss: 4.115536, tar: 0.415012 
l0: 0.625593, l1: 0.632823, l2: 0.631651, l3: 0.631683, l4: 0.661118, l5: 0.732697, l6: 1.046134

[epoch:  83/100, batch:   418/  792, ite: 70361] train loss: 4.120723, tar: 0.415595 
l0: 0.356384, l1: 0.359398, l2: 0.358598, l3: 0.351610, l4: 0.381177, l5: 0.477565, l6: 0.654785

[epoch:  83/100, batch:   420/  792, ite: 70362] train loss: 4.119603, tar: 0.415432 
l0: 0.348571, l1: 0.352094, l2: 0.351852, l3: 0.359599, l4: 0.383234, l5: 0.463814, l6: 0.625288

[epoch:  83/100, batch:   422/  792, ite: 70363] train loss: 4.118092, tar: 0.41

KeyboardInterrupt: 

In [None]:
#21:23 - 21:30

In [8]:
torch.save(net.state_dict(), model_dir + "basnet_bsi_itr_%d_train_%3f_tar_%3f.pth" % (ite_num, running_loss / ite_num4val, running_tar_loss / ite_num4val))