In [None]:
import os
import sys
import time
import random
import string

import torch
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.optim as optim
import torch.utils.data
import numpy as np
import torch.nn.functional as F
from nltk.metrics.distance import edit_distance

from utils import CTCLabelConverter, CTCLabelConverterForBaiduWarpctc, AttnLabelConverter, Averager
from dataset import custom_dataset,AlignCollate
from model import Model

import easydict
global opt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

global opt
opt = easydict.EasyDict({
    "exp_name": "test_01",
    "train_data": "/data/data/STARN/data_lmdb_release/training",
    "valid_data":"/data/data/STARN/data_lmdb_release/validation",
    "manualSeed": 1111,
    "workers": 8,
    "batch_size":1024,
    "num_iter":300000,
    "valInterval":1,
    "saved_model":'',
    "FT":False,
    "adam":False,
    "lr":1,
    "beta1":0.9,
    "rho":0.95,
    "eps":1e-8,
    "grad_clip":5,
    "baiduCTC":False,
    "select_data":'ST',
    "batch_ratio":'1',
    "total_data_usage_ratio":'1.0',
    "batch_max_length":25,
    "imgW":100,
    "imgH":32,
    "rgb":False,
    "character":"0123456789abcdefghijklmnopqrstuvwxyz",
    "sensitive":False,
    "PAD":False,
    "data_filtering_off":False,
    "Transformation":"TPS",
    "FeatureExtraction":"ResNet",
    "SequenceModeling":"BiLSTM",
    "Prediction":'Attn',
    "num_fiducial":20,
    "input_channel":1,
    "output_channel":512,
    "hidden_size":256    
})





def validation(model, criterion, evaluation_loader, converter, opt):
    """ validation or evaluation """
    n_correct = 0
    norm_ED = 0
    length_of_data = 0
    infer_time = 0
    valid_loss_avg = Averager()

    for i, (image_tensors, labels) in enumerate(evaluation_loader):
        batch_size = image_tensors.size(0)
        length_of_data = length_of_data + batch_size
        image = image_tensors.to(device)
        # For max length prediction
        length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device)
        text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device)

        text_for_loss, length_for_loss = converter.encode(labels, batch_max_length=opt.batch_max_length)

        start_time = time.time()
        if 'CTC' in opt.Prediction:
            preds = model(image, text_for_pred)
            forward_time = time.time() - start_time

            # Calculate evaluation loss for CTC deocder.
            preds_size = torch.IntTensor([preds.size(1)] * batch_size)
            # permute 'preds' to use CTCloss format
            if opt.baiduCTC:
                cost = criterion(preds.permute(1, 0, 2), text_for_loss, preds_size, length_for_loss) / batch_size
            else:
                cost = criterion(preds.log_softmax(2).permute(1, 0, 2), text_for_loss, preds_size, length_for_loss)

            # Select max probabilty (greedy decoding) then decode index to character
            if opt.baiduCTC:
                _, preds_index = preds.max(2)
                preds_index = preds_index.view(-1)
            else:
                _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index.data, preds_size.data)
        
        else:
            preds = model(image, text_for_pred, is_train=False)
            forward_time = time.time() - start_time

            preds = preds[:, :text_for_loss.shape[1] - 1, :]
            target = text_for_loss[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1))

            # select max probabilty (greedy decoding) then decode index to character
            _, preds_index = preds.max(2)
            preds_str = converter.decode(preds_index, length_for_pred)
            labels = converter.decode(text_for_loss[:, 1:], length_for_loss)

        infer_time += forward_time
        valid_loss_avg.add(cost)

        # calculate accuracy & confidence score
        preds_prob = F.softmax(preds, dim=2)
        preds_max_prob, _ = preds_prob.max(dim=2)
        confidence_score_list = []
        for gt, pred, pred_max_prob in zip(labels, preds_str, preds_max_prob):
            if 'Attn' in opt.Prediction:
                gt = gt[:gt.find('[s]')]
                pred_EOS = pred.find('[s]')
                pred = pred[:pred_EOS]  # prune after "end of sentence" token ([s])
                pred_max_prob = pred_max_prob[:pred_EOS]


            if pred == gt:
                n_correct += 1

            '''
            (old version) ICDAR2017 DOST Normalized Edit Distance https://rrc.cvc.uab.es/?ch=7&com=tasks
            "For each word we calculate the normalized edit distance to the length of the ground truth transcription."
            if len(gt) == 0:
                norm_ED += 1
            else:
                norm_ED += edit_distance(pred, gt) / len(gt)
            '''

            # ICDAR2019 Normalized Edit Distance
            if len(gt) == 0 or len(pred) == 0:
                norm_ED += 0
            elif len(gt) > len(pred):
                norm_ED += 1 - edit_distance(pred, gt) / len(gt)
            else:
                norm_ED += 1 - edit_distance(pred, gt) / len(pred)

            # calculate confidence score (= multiply of pred_max_prob)
            try:
                confidence_score = pred_max_prob.cumprod(dim=0)[-1]
            except:
                confidence_score = 0  # for empty pred case, when prune after "end of sentence" token ([s])
            confidence_score_list.append(confidence_score)
            # print(pred, gt, pred==gt, confidence_score)

    accuracy = n_correct / float(length_of_data) * 100
    norm_ED = norm_ED / float(length_of_data)  # ICDAR2019 Normalized Edit Distance

    return valid_loss_avg.val(), accuracy, norm_ED, preds_str, confidence_score_list, labels, infer_time, length_of_data




if __name__ == '__main__':

    """ Seed and GPU setting """
    # print("Random Seed: ", opt.manualSeed)
    random.seed(opt.manualSeed)
    np.random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)
    torch.cuda.manual_seed(opt.manualSeed)

    cudnn.benchmark = True
    cudnn.deterministic = True
    opt.num_gpu = torch.cuda.device_count()
    # print('device count', opt.num_gpu)
    if opt.num_gpu > 1:
        print('------ Use multi-GPU setting ------')
        print('if you stuck too long time with multi-GPU setting, try to set --workers 0')
        # check multi-GPU issue https://github.com/clovaai/deep-text-recognition-benchmark/issues/1
        opt.workers = opt.workers * opt.num_gpu
        opt.batch_size = opt.batch_size * opt.num_gpu









    numclass_path = "./ch_range.txt"
    f = open(numclass_path, 'r')
    ch_temp = f.read()
    f.close()
    
    
    opt.character = ch_temp

    converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)


    train_dataset = custom_dataset("./dict/nia_refine_concat.txt","./font_full","train")
    valid_dataset = custom_dataset("./dict/nia_refine_concat.txt","./font","valid")

    AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD)

    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=opt.batch_size,
            shuffle=True,  # 'True' to check training progress with validation function.
            num_workers=int(opt.workers),
            collate_fn=AlignCollate_valid, pin_memory=True)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batch_size,
            shuffle=True,  # 'True' to check training progress with validation function.
            num_workers=int(opt.workers),
            collate_fn=AlignCollate_valid, pin_memory=True)


    if opt.rgb:
        opt.input_channel = 3
    model = Model(opt)


    for name, param in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initialized')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
        except Exception as e:  # for batchnorm.
            if 'weight' in name:
                param.data.fill_(1)
            continue


    model = torch.nn.DataParallel(model).to(device)



    criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

    loss_avg = Averager()


    filtered_parameters = []

    for p in filter(lambda p: p.requires_grad, model.parameters()):
        filtered_parameters.append(p)


    if opt.adam:
#         optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999))
        optimizer = optim.Adam(filtered_parameters, lr=opt.lr)
    else:
        optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps)

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=360, gamma=0.98)
    
    nb_epochs = 100000

    for epoch in range(nb_epochs + 1):
        
        for batch_idx, samples in enumerate(train_loader):

            log = open(f'./log_val.txt', 'a')
            log2= open(f'./log_train.txt', 'a')

            start_time = time.time()        
            model.train()

            image_tensors, labels = samples
            image = image_tensors.to(device)
            text, length = converter.encode(labels, batch_max_length=opt.batch_max_length)




            preds = model(image, text[:, :-1])  # align with Attention.forward
            target = text[:, 1:]  # without [GO] Symbol
            cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1))



            model.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip)  # gradient clipping with 5 (Default)
            optimizer.step()
            scheduler.step()
            loss_avg.add(cost)

            for param_group in optimizer.param_groups:
                learning_rate_val=param_group['lr']


            ## 평가
            model.eval()
            with torch.no_grad():
                valid_loss, current_accuracy, current_norm_ED, preds, confidence_score, labels, infer_time, length_of_data = validation(
                            model, criterion, valid_loader, converter, opt)

            end = time.time()
            loss_log = f'epoch : {epoch} [{batch_idx}/{len(train_loader)}] Train loss: {loss_avg.val():0.5f},Valid loss: {valid_loss:0.5f}, time : {end-start_time} lr : {learning_rate_val}'        
            loss_avg.reset()


            print(loss_log)

            dashed_line = '-' * 80
            head = f'{"Ground Truth":25s} | {"Prediction":25s} | Confidence Score & T/F'
            predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n'
            for gt, pred, confidence in zip(labels[:5], preds[:5], confidence_score[:5]):
                if 'Attn' in opt.Prediction:
                    gt = gt[:gt.find('[s]')]
                    pred = pred[:pred.find('[s]')]

                predicted_result_log += f'{gt:25s} | {pred:25s} | {confidence:0.4f}\t{str(pred == gt)}\n'
            predicted_result_log += f'{dashed_line}'
    #         print(predicted_result_log)
            
            log2.write(loss_log + '\n')
            log.write(loss_log + '\n')
            log.write(predicted_result_log + '\n')
            log.close()
            log2.close()
            
#         scheduler.step()

------ Use multi-GPU setting ------
if you stuck too long time with multi-GPU setting, try to set --workers 0
Skip Transformation.LocalizationNetwork.localization_fc2.weight as it is already initialized
Skip Transformation.LocalizationNetwork.localization_fc2.bias as it is already initialized
epoch : 0 [0/21279] Train loss: 8.01477,Valid loss: 7.96785, time : 43.055087089538574 lr : 1
epoch : 0 [1/21279] Train loss: 7.90191,Valid loss: 7.95940, time : 10.63684606552124 lr : 1
epoch : 0 [2/21279] Train loss: 7.76160,Valid loss: 7.92126, time : 10.193422079086304 lr : 1
epoch : 0 [3/21279] Train loss: 7.57548,Valid loss: 7.62940, time : 10.5014328956604 lr : 1
epoch : 0 [4/21279] Train loss: 7.32230,Valid loss: 7.44721, time : 10.221692562103271 lr : 1
epoch : 0 [5/21279] Train loss: 7.04555,Valid loss: 6.98430, time : 10.762374639511108 lr : 1
epoch : 0 [6/21279] Train loss: 6.85669,Valid loss: 6.81894, time : 10.311545372009277 lr : 1
epoch : 0 [7/21279] Train loss: 6.76436,Valid loss:

epoch : 0 [84/21279] Train loss: 5.41221,Valid loss: 5.49292, time : 11.272991418838501 lr : 1
epoch : 0 [85/21279] Train loss: 5.40791,Valid loss: 5.43533, time : 11.626311779022217 lr : 1
epoch : 0 [86/21279] Train loss: 5.34941,Valid loss: 5.42027, time : 11.131417751312256 lr : 1
epoch : 0 [87/21279] Train loss: 5.35101,Valid loss: 5.40940, time : 11.41591501235962 lr : 1
epoch : 0 [88/21279] Train loss: 5.33876,Valid loss: 5.40374, time : 11.632770776748657 lr : 1
epoch : 0 [89/21279] Train loss: 5.34406,Valid loss: 5.39797, time : 12.163928270339966 lr : 1
epoch : 0 [90/21279] Train loss: 5.34067,Valid loss: 5.43872, time : 11.281046152114868 lr : 1
epoch : 0 [91/21279] Train loss: 5.39648,Valid loss: 5.46886, time : 11.134479999542236 lr : 1
epoch : 0 [92/21279] Train loss: 5.39831,Valid loss: 5.46828, time : 11.319393634796143 lr : 1
epoch : 0 [93/21279] Train loss: 5.38359,Valid loss: 5.43556, time : 13.137430667877197 lr : 1
epoch : 0 [94/21279] Train loss: 5.37351,Valid loss

epoch : 0 [170/21279] Train loss: 5.28152,Valid loss: 5.33538, time : 11.165393114089966 lr : 1
epoch : 0 [171/21279] Train loss: 5.28371,Valid loss: 5.35307, time : 11.623157501220703 lr : 1
epoch : 0 [172/21279] Train loss: 5.30113,Valid loss: 5.35610, time : 11.106384754180908 lr : 1
epoch : 0 [173/21279] Train loss: 5.35670,Valid loss: 5.36009, time : 11.06220817565918 lr : 1
epoch : 0 [174/21279] Train loss: 5.32081,Valid loss: 5.32951, time : 11.359291076660156 lr : 1
epoch : 0 [175/21279] Train loss: 5.27810,Valid loss: 5.34589, time : 13.612498998641968 lr : 1
epoch : 0 [176/21279] Train loss: 5.27699,Valid loss: 5.31777, time : 11.092890739440918 lr : 1
epoch : 0 [177/21279] Train loss: 5.27325,Valid loss: 5.36933, time : 10.929848194122314 lr : 1
epoch : 0 [178/21279] Train loss: 5.26845,Valid loss: 5.39074, time : 11.782737016677856 lr : 1
epoch : 0 [179/21279] Train loss: 5.30323,Valid loss: 5.41977, time : 11.316992998123169 lr : 1
epoch : 0 [180/21279] Train loss: 5.29604

epoch : 0 [256/21279] Train loss: 5.23680,Valid loss: 5.34691, time : 11.953730344772339 lr : 1
epoch : 0 [257/21279] Train loss: 5.23490,Valid loss: 5.34544, time : 14.809532880783081 lr : 1
epoch : 0 [258/21279] Train loss: 5.23347,Valid loss: 5.29346, time : 11.987007856369019 lr : 1
epoch : 0 [259/21279] Train loss: 5.24956,Valid loss: 5.29678, time : 11.74241328239441 lr : 1
epoch : 0 [260/21279] Train loss: 5.26004,Valid loss: 5.29438, time : 11.420980215072632 lr : 1
epoch : 0 [261/21279] Train loss: 5.27535,Valid loss: 5.27825, time : 11.69792890548706 lr : 1
epoch : 0 [262/21279] Train loss: 5.23707,Valid loss: 5.26438, time : 12.20431113243103 lr : 1
epoch : 0 [263/21279] Train loss: 5.22464,Valid loss: 5.29147, time : 11.829164743423462 lr : 1
epoch : 0 [264/21279] Train loss: 5.23387,Valid loss: 5.31997, time : 12.200077772140503 lr : 1
epoch : 0 [265/21279] Train loss: 5.29482,Valid loss: 5.31793, time : 10.906011819839478 lr : 1
epoch : 0 [266/21279] Train loss: 5.25193,V

epoch : 0 [342/21279] Train loss: 5.19425,Valid loss: 5.27383, time : 11.702513694763184 lr : 1
epoch : 0 [343/21279] Train loss: 5.19970,Valid loss: 5.25378, time : 11.35193419456482 lr : 1
epoch : 0 [344/21279] Train loss: 5.18005,Valid loss: 5.21135, time : 12.01891279220581 lr : 1
epoch : 0 [345/21279] Train loss: 5.17978,Valid loss: 5.29264, time : 11.769244909286499 lr : 1
epoch : 0 [346/21279] Train loss: 5.18000,Valid loss: 5.25759, time : 11.356447696685791 lr : 1
epoch : 0 [347/21279] Train loss: 5.22911,Valid loss: 5.27330, time : 12.079937219619751 lr : 1
epoch : 0 [348/21279] Train loss: 5.17796,Valid loss: 5.21624, time : 11.618089199066162 lr : 1
epoch : 0 [349/21279] Train loss: 5.16843,Valid loss: 5.26767, time : 11.781020641326904 lr : 1
epoch : 0 [350/21279] Train loss: 5.18131,Valid loss: 5.24291, time : 12.240309476852417 lr : 1
epoch : 0 [351/21279] Train loss: 5.24084,Valid loss: 5.28639, time : 13.618338584899902 lr : 1
epoch : 0 [352/21279] Train loss: 5.20673,

epoch : 0 [428/21279] Train loss: 5.11322,Valid loss: 5.23347, time : 11.591976642608643 lr : 1
epoch : 0 [429/21279] Train loss: 5.17204,Valid loss: 5.26625, time : 11.33017635345459 lr : 1
epoch : 0 [430/21279] Train loss: 5.11750,Valid loss: 5.17706, time : 11.976336479187012 lr : 1
epoch : 0 [431/21279] Train loss: 5.13284,Valid loss: 5.21061, time : 12.367023229598999 lr : 1
epoch : 0 [432/21279] Train loss: 5.10311,Valid loss: 5.14759, time : 11.352062463760376 lr : 1
epoch : 0 [433/21279] Train loss: 5.14115,Valid loss: 5.21481, time : 11.484602689743042 lr : 1
epoch : 0 [434/21279] Train loss: 5.10290,Valid loss: 5.19289, time : 11.47683835029602 lr : 1
epoch : 0 [435/21279] Train loss: 5.16404,Valid loss: 5.22747, time : 12.90763545036316 lr : 1
epoch : 0 [436/21279] Train loss: 5.12536,Valid loss: 5.19577, time : 11.468342542648315 lr : 1
epoch : 0 [437/21279] Train loss: 5.10974,Valid loss: 5.24114, time : 11.501582145690918 lr : 1
epoch : 0 [438/21279] Train loss: 5.09294,V

epoch : 0 [514/21279] Train loss: 5.07555,Valid loss: 5.07585, time : 11.43490982055664 lr : 0.99
epoch : 0 [515/21279] Train loss: 5.02315,Valid loss: 5.16638, time : 11.729246854782104 lr : 0.99
epoch : 0 [516/21279] Train loss: 5.05295,Valid loss: 5.15610, time : 11.11424994468689 lr : 0.99
epoch : 0 [517/21279] Train loss: 5.03698,Valid loss: 5.19962, time : 11.893129587173462 lr : 0.99
epoch : 0 [518/21279] Train loss: 5.09268,Valid loss: 5.10049, time : 12.879006385803223 lr : 0.99
epoch : 0 [519/21279] Train loss: 5.00213,Valid loss: 5.09368, time : 11.312658071517944 lr : 0.99
epoch : 0 [520/21279] Train loss: 4.99637,Valid loss: 5.18923, time : 11.645411014556885 lr : 0.99
epoch : 0 [521/21279] Train loss: 4.99414,Valid loss: 5.09800, time : 10.901386499404907 lr : 0.99
epoch : 0 [522/21279] Train loss: 5.01575,Valid loss: 5.34819, time : 11.511387586593628 lr : 0.99
epoch : 0 [523/21279] Train loss: 5.01855,Valid loss: 5.53032, time : 11.749949216842651 lr : 0.99
epoch : 0 [5

epoch : 0 [597/21279] Train loss: 4.90127,Valid loss: 5.02112, time : 11.477890968322754 lr : 0.99
epoch : 0 [598/21279] Train loss: 4.85821,Valid loss: 4.97374, time : 11.706592798233032 lr : 0.99
epoch : 0 [599/21279] Train loss: 4.87447,Valid loss: 5.08272, time : 13.371514797210693 lr : 0.99
epoch : 0 [600/21279] Train loss: 4.89617,Valid loss: 5.04748, time : 11.541189908981323 lr : 0.99
epoch : 0 [601/21279] Train loss: 4.97519,Valid loss: 5.00829, time : 12.695217847824097 lr : 0.99
epoch : 0 [602/21279] Train loss: 4.89574,Valid loss: 4.93689, time : 11.205849170684814 lr : 0.99
epoch : 0 [603/21279] Train loss: 4.88657,Valid loss: 5.02213, time : 11.40613842010498 lr : 0.99
epoch : 0 [604/21279] Train loss: 4.88344,Valid loss: 5.03858, time : 11.02358341217041 lr : 0.99
epoch : 0 [605/21279] Train loss: 4.92624,Valid loss: 5.00324, time : 11.716572523117065 lr : 0.99
epoch : 0 [606/21279] Train loss: 4.92211,Valid loss: 5.04312, time : 11.395732402801514 lr : 0.99
epoch : 0 [6

epoch : 0 [681/21279] Train loss: 4.76395,Valid loss: 4.89089, time : 13.364815473556519 lr : 0.99
epoch : 0 [682/21279] Train loss: 4.72915,Valid loss: 4.92231, time : 11.838987350463867 lr : 0.99
epoch : 0 [683/21279] Train loss: 4.72796,Valid loss: 4.90003, time : 12.177138328552246 lr : 0.99
epoch : 0 [684/21279] Train loss: 4.76672,Valid loss: 4.98018, time : 12.371362686157227 lr : 0.99
epoch : 0 [685/21279] Train loss: 4.75261,Valid loss: 5.03607, time : 12.422059535980225 lr : 0.99
epoch : 0 [686/21279] Train loss: 4.80216,Valid loss: 4.99377, time : 11.944973945617676 lr : 0.99
epoch : 0 [687/21279] Train loss: 4.76695,Valid loss: 5.00526, time : 12.215057373046875 lr : 0.99
epoch : 0 [688/21279] Train loss: 4.74145,Valid loss: 4.97978, time : 12.045120477676392 lr : 0.99
epoch : 0 [689/21279] Train loss: 4.74605,Valid loss: 5.10533, time : 12.57434630393982 lr : 0.99
epoch : 0 [690/21279] Train loss: 4.78919,Valid loss: 5.05186, time : 12.539033889770508 lr : 0.99
epoch : 0 [

epoch : 0 [764/21279] Train loss: 4.66535,Valid loss: 4.95279, time : 12.148228406906128 lr : 0.99
epoch : 0 [765/21279] Train loss: 4.67170,Valid loss: 4.89385, time : 15.0916907787323 lr : 0.99
epoch : 0 [766/21279] Train loss: 4.66745,Valid loss: 4.91932, time : 12.13465929031372 lr : 0.99
epoch : 0 [767/21279] Train loss: 4.66964,Valid loss: 4.88335, time : 12.14516282081604 lr : 0.99
epoch : 0 [768/21279] Train loss: 4.66958,Valid loss: 4.83008, time : 11.661271572113037 lr : 0.99
epoch : 0 [769/21279] Train loss: 4.61121,Valid loss: 4.85644, time : 11.53825330734253 lr : 0.99
epoch : 0 [770/21279] Train loss: 4.61954,Valid loss: 4.83066, time : 11.700281620025635 lr : 0.99
epoch : 0 [771/21279] Train loss: 4.63412,Valid loss: 4.89565, time : 11.78589391708374 lr : 0.99
epoch : 0 [772/21279] Train loss: 4.63214,Valid loss: 4.95313, time : 11.704999685287476 lr : 0.99
epoch : 0 [773/21279] Train loss: 4.71529,Valid loss: 4.79951, time : 12.16714859008789 lr : 0.99
epoch : 0 [774/21

epoch : 0 [847/21279] Train loss: 4.48605,Valid loss: 4.79699, time : 14.825771570205688 lr : 0.99
epoch : 0 [848/21279] Train loss: 4.51176,Valid loss: 4.74874, time : 12.116151094436646 lr : 0.99
epoch : 0 [849/21279] Train loss: 4.53311,Valid loss: 4.86619, time : 12.264716386795044 lr : 0.99
epoch : 0 [850/21279] Train loss: 4.56540,Valid loss: 4.81469, time : 11.798486948013306 lr : 0.99
epoch : 0 [851/21279] Train loss: 4.54449,Valid loss: 4.83404, time : 11.370555877685547 lr : 0.99
epoch : 0 [852/21279] Train loss: 4.54550,Valid loss: 4.92144, time : 11.743183851242065 lr : 0.99
epoch : 0 [853/21279] Train loss: 4.59577,Valid loss: 4.75182, time : 11.392359018325806 lr : 0.99
epoch : 0 [854/21279] Train loss: 4.55297,Valid loss: 4.78440, time : 11.805839776992798 lr : 0.99
epoch : 0 [855/21279] Train loss: 4.50317,Valid loss: 4.69580, time : 12.088062763214111 lr : 0.99
epoch : 0 [856/21279] Train loss: 4.50418,Valid loss: 4.74251, time : 11.67400860786438 lr : 0.99
epoch : 0 [

epoch : 0 [930/21279] Train loss: 4.37952,Valid loss: 4.76866, time : 12.301211595535278 lr : 0.99
epoch : 0 [931/21279] Train loss: 4.45861,Valid loss: 4.74076, time : 12.349539756774902 lr : 0.99
epoch : 0 [932/21279] Train loss: 4.36922,Valid loss: 4.61306, time : 12.458149671554565 lr : 0.99
epoch : 0 [933/21279] Train loss: 4.37621,Valid loss: 4.67542, time : 12.403028726577759 lr : 0.99
epoch : 0 [934/21279] Train loss: 4.39496,Valid loss: 4.72812, time : 12.492802381515503 lr : 0.99
epoch : 0 [935/21279] Train loss: 4.45538,Valid loss: 4.67772, time : 12.581629753112793 lr : 0.99
epoch : 0 [936/21279] Train loss: 4.37401,Valid loss: 4.64517, time : 11.836921215057373 lr : 0.99
epoch : 0 [937/21279] Train loss: 4.36083,Valid loss: 4.62670, time : 11.885070562362671 lr : 0.99
epoch : 0 [938/21279] Train loss: 4.33637,Valid loss: 4.68059, time : 11.8229660987854 lr : 0.99
epoch : 0 [939/21279] Train loss: 4.34716,Valid loss: 4.72593, time : 12.516276836395264 lr : 0.99
epoch : 0 [9

epoch : 0 [1013/21279] Train loss: 4.20017,Valid loss: 4.53295, time : 12.752357721328735 lr : 0.9801
epoch : 0 [1014/21279] Train loss: 4.18680,Valid loss: 4.42982, time : 12.576718807220459 lr : 0.9801
epoch : 0 [1015/21279] Train loss: 4.18361,Valid loss: 4.60114, time : 12.155163764953613 lr : 0.9801
epoch : 0 [1016/21279] Train loss: 4.20166,Valid loss: 4.58365, time : 12.003170251846313 lr : 0.9801
epoch : 0 [1017/21279] Train loss: 4.24715,Valid loss: 4.83136, time : 12.553152322769165 lr : 0.9801
epoch : 0 [1018/21279] Train loss: 4.43388,Valid loss: 4.65443, time : 12.345698118209839 lr : 0.9801
epoch : 0 [1019/21279] Train loss: 4.30320,Valid loss: 4.70717, time : 12.049137592315674 lr : 0.9801
epoch : 0 [1020/21279] Train loss: 4.32904,Valid loss: 4.55457, time : 12.334321975708008 lr : 0.9801
epoch : 0 [1021/21279] Train loss: 4.24966,Valid loss: 4.79372, time : 12.025235652923584 lr : 0.9801
epoch : 0 [1022/21279] Train loss: 4.31353,Valid loss: 4.69875, time : 12.47657966

epoch : 0 [1094/21279] Train loss: 4.14991,Valid loss: 4.64955, time : 14.925849437713623 lr : 0.9801
epoch : 0 [1095/21279] Train loss: 4.14725,Valid loss: 4.39892, time : 12.555384635925293 lr : 0.9801
epoch : 0 [1096/21279] Train loss: 4.09396,Valid loss: 4.60786, time : 12.393287420272827 lr : 0.9801
epoch : 0 [1097/21279] Train loss: 4.10718,Valid loss: 4.44913, time : 11.588823795318604 lr : 0.9801
epoch : 0 [1098/21279] Train loss: 4.15120,Valid loss: 4.66338, time : 12.403114318847656 lr : 0.9801
epoch : 0 [1099/21279] Train loss: 4.15626,Valid loss: 4.36918, time : 12.570399045944214 lr : 0.9801
epoch : 0 [1100/21279] Train loss: 4.05871,Valid loss: 4.21812, time : 12.272849321365356 lr : 0.9801
epoch : 0 [1101/21279] Train loss: 4.03052,Valid loss: 4.57328, time : 12.476317167282104 lr : 0.9801
epoch : 0 [1102/21279] Train loss: 4.08744,Valid loss: 4.55236, time : 12.034842729568481 lr : 0.9801
epoch : 0 [1103/21279] Train loss: 4.20607,Valid loss: 4.37129, time : 12.21700692

epoch : 0 [1175/21279] Train loss: 4.07538,Valid loss: 4.95509, time : 12.82300329208374 lr : 0.9801
epoch : 0 [1176/21279] Train loss: 4.02922,Valid loss: 4.52468, time : 12.460582256317139 lr : 0.9801
epoch : 0 [1177/21279] Train loss: 3.98009,Valid loss: 4.44999, time : 13.908876180648804 lr : 0.9801
epoch : 0 [1178/21279] Train loss: 3.90821,Valid loss: 4.20476, time : 12.043365001678467 lr : 0.9801
epoch : 0 [1179/21279] Train loss: 3.85936,Valid loss: 4.15587, time : 12.615407943725586 lr : 0.9801
epoch : 0 [1180/21279] Train loss: 3.84092,Valid loss: 4.12375, time : 12.498673915863037 lr : 0.9801
epoch : 0 [1181/21279] Train loss: 3.79951,Valid loss: 4.23350, time : 12.282057523727417 lr : 0.9801
epoch : 0 [1182/21279] Train loss: 3.83258,Valid loss: 4.29029, time : 12.467796564102173 lr : 0.9801
epoch : 0 [1183/21279] Train loss: 3.87342,Valid loss: 4.38464, time : 12.578464031219482 lr : 0.9801
epoch : 0 [1184/21279] Train loss: 3.87940,Valid loss: 4.40553, time : 12.649717807

epoch : 0 [1256/21279] Train loss: 3.70820,Valid loss: 4.01986, time : 12.60851263999939 lr : 0.9801
epoch : 0 [1257/21279] Train loss: 3.64665,Valid loss: 3.93333, time : 13.032427549362183 lr : 0.9801
epoch : 0 [1258/21279] Train loss: 3.62463,Valid loss: 3.94344, time : 14.628081321716309 lr : 0.9801
epoch : 0 [1259/21279] Train loss: 3.61097,Valid loss: 3.93756, time : 12.62645673751831 lr : 0.9801
epoch : 0 [1260/21279] Train loss: 3.64998,Valid loss: 4.08627, time : 12.612124681472778 lr : 0.9801
epoch : 0 [1261/21279] Train loss: 3.61993,Valid loss: 3.93935, time : 12.896236419677734 lr : 0.9801
epoch : 0 [1262/21279] Train loss: 3.64113,Valid loss: 4.28620, time : 12.69379448890686 lr : 0.9801
epoch : 0 [1263/21279] Train loss: 3.63566,Valid loss: 4.26213, time : 12.798458576202393 lr : 0.9801
epoch : 0 [1264/21279] Train loss: 3.74840,Valid loss: 4.44477, time : 12.345874547958374 lr : 0.9801
epoch : 0 [1265/21279] Train loss: 3.85944,Valid loss: 3.99501, time : 13.02524137496

epoch : 0 [1337/21279] Train loss: 3.45682,Valid loss: 3.94995, time : 12.261680603027344 lr : 0.9801
epoch : 0 [1338/21279] Train loss: 3.51501,Valid loss: 3.98116, time : 11.8374662399292 lr : 0.9801
epoch : 0 [1339/21279] Train loss: 3.54026,Valid loss: 4.17807, time : 11.472371101379395 lr : 0.9801
epoch : 0 [1340/21279] Train loss: 3.69295,Valid loss: 3.85387, time : 13.863228559494019 lr : 0.9801
epoch : 0 [1341/21279] Train loss: 3.46196,Valid loss: 3.98392, time : 11.797799587249756 lr : 0.9801
epoch : 0 [1342/21279] Train loss: 3.48098,Valid loss: 3.83292, time : 12.442875385284424 lr : 0.9801
epoch : 0 [1343/21279] Train loss: 3.49105,Valid loss: 4.08576, time : 11.716596364974976 lr : 0.9801
epoch : 0 [1344/21279] Train loss: 3.53653,Valid loss: 3.91688, time : 11.987570762634277 lr : 0.9801
epoch : 0 [1345/21279] Train loss: 3.41783,Valid loss: 4.07006, time : 11.649301052093506 lr : 0.9801
epoch : 0 [1346/21279] Train loss: 3.42253,Valid loss: 4.08300, time : 11.7543673515

epoch : 0 [1418/21279] Train loss: 3.31261,Valid loss: 4.02894, time : 12.231131553649902 lr : 0.9801
epoch : 0 [1419/21279] Train loss: 3.49420,Valid loss: 3.86497, time : 11.838268995285034 lr : 0.9801
epoch : 0 [1420/21279] Train loss: 3.24169,Valid loss: 3.76659, time : 11.989887475967407 lr : 0.9801
epoch : 0 [1421/21279] Train loss: 3.22349,Valid loss: 3.56756, time : 12.219864130020142 lr : 0.9801
epoch : 0 [1422/21279] Train loss: 3.17076,Valid loss: 3.69608, time : 12.097031831741333 lr : 0.9801
epoch : 0 [1423/21279] Train loss: 3.18776,Valid loss: 3.52428, time : 12.225686311721802 lr : 0.9801
epoch : 0 [1424/21279] Train loss: 3.19177,Valid loss: 3.48285, time : 14.030378818511963 lr : 0.9801
epoch : 0 [1425/21279] Train loss: 3.13071,Valid loss: 3.39529, time : 11.734416723251343 lr : 0.9801
epoch : 0 [1426/21279] Train loss: 3.09261,Valid loss: 3.48342, time : 12.130078077316284 lr : 0.9801
epoch : 0 [1427/21279] Train loss: 3.07899,Valid loss: 3.41676, time : 12.13359117

epoch : 0 [1499/21279] Train loss: 3.21080,Valid loss: 4.89626, time : 12.395850419998169 lr : 0.9702989999999999
epoch : 0 [1500/21279] Train loss: 3.37350,Valid loss: 3.69204, time : 12.352985620498657 lr : 0.9702989999999999
epoch : 0 [1501/21279] Train loss: 3.06176,Valid loss: 3.51403, time : 12.509890079498291 lr : 0.9702989999999999
epoch : 0 [1502/21279] Train loss: 2.97944,Valid loss: 3.37265, time : 12.232416152954102 lr : 0.9702989999999999
epoch : 0 [1503/21279] Train loss: 2.91213,Valid loss: 3.19336, time : 12.617674589157104 lr : 0.9702989999999999
epoch : 0 [1504/21279] Train loss: 2.91532,Valid loss: 3.39419, time : 12.904682159423828 lr : 0.9702989999999999
epoch : 0 [1505/21279] Train loss: 2.91325,Valid loss: 3.23332, time : 12.851622819900513 lr : 0.9702989999999999
epoch : 0 [1506/21279] Train loss: 2.90464,Valid loss: 3.33870, time : 13.04464602470398 lr : 0.9702989999999999
epoch : 0 [1507/21279] Train loss: 2.92001,Valid loss: 3.28997, time : 14.741681814193726

epoch : 0 [1571/21279] Train loss: 2.83777,Valid loss: 3.74481, time : 12.56140398979187 lr : 0.9702989999999999
epoch : 0 [1572/21279] Train loss: 2.99013,Valid loss: 3.14669, time : 12.793037176132202 lr : 0.9702989999999999
epoch : 0 [1573/21279] Train loss: 2.84518,Valid loss: 3.31975, time : 13.04988169670105 lr : 0.9702989999999999
epoch : 0 [1574/21279] Train loss: 2.79162,Valid loss: 3.21466, time : 12.938713788986206 lr : 0.9702989999999999
epoch : 0 [1575/21279] Train loss: 2.70566,Valid loss: 3.30445, time : 12.618752479553223 lr : 0.9702989999999999
epoch : 0 [1576/21279] Train loss: 2.72364,Valid loss: 3.21305, time : 20.19285297393799 lr : 0.9702989999999999
epoch : 0 [1577/21279] Train loss: 2.72572,Valid loss: 3.30945, time : 12.310718536376953 lr : 0.9702989999999999
epoch : 0 [1578/21279] Train loss: 2.70164,Valid loss: 3.07654, time : 12.52318549156189 lr : 0.9702989999999999
epoch : 0 [1579/21279] Train loss: 2.68680,Valid loss: 3.28044, time : 12.592390060424805 lr

epoch : 0 [1644/21279] Train loss: 2.69780,Valid loss: 3.16299, time : 14.85951852798462 lr : 0.9702989999999999
epoch : 0 [1645/21279] Train loss: 2.63407,Valid loss: 3.11042, time : 12.447931289672852 lr : 0.9702989999999999
epoch : 0 [1646/21279] Train loss: 2.50722,Valid loss: 2.80638, time : 12.498783826828003 lr : 0.9702989999999999
epoch : 0 [1647/21279] Train loss: 2.47490,Valid loss: 2.93815, time : 12.490886449813843 lr : 0.9702989999999999
epoch : 0 [1648/21279] Train loss: 2.52447,Valid loss: 2.86041, time : 12.670697689056396 lr : 0.9702989999999999
epoch : 0 [1649/21279] Train loss: 2.53323,Valid loss: 2.98663, time : 12.463221788406372 lr : 0.9702989999999999
epoch : 0 [1650/21279] Train loss: 2.55881,Valid loss: 2.97110, time : 12.604193449020386 lr : 0.9702989999999999
epoch : 0 [1651/21279] Train loss: 2.52544,Valid loss: 2.89959, time : 12.658018112182617 lr : 0.9702989999999999
epoch : 0 [1652/21279] Train loss: 2.54811,Valid loss: 3.05189, time : 12.571591854095459

epoch : 0 [1716/21279] Train loss: 2.39643,Valid loss: 2.61118, time : 12.908348083496094 lr : 0.9702989999999999
epoch : 0 [1717/21279] Train loss: 2.28626,Valid loss: 2.68190, time : 13.167324542999268 lr : 0.9702989999999999
epoch : 0 [1718/21279] Train loss: 2.25052,Valid loss: 2.66957, time : 12.671744346618652 lr : 0.9702989999999999
epoch : 0 [1719/21279] Train loss: 2.25264,Valid loss: 2.61274, time : 13.027038097381592 lr : 0.9702989999999999
epoch : 0 [1720/21279] Train loss: 2.26367,Valid loss: 2.55642, time : 12.451454401016235 lr : 0.9702989999999999
epoch : 0 [1721/21279] Train loss: 2.23763,Valid loss: 2.75655, time : 12.946502923965454 lr : 0.9702989999999999
epoch : 0 [1722/21279] Train loss: 2.31919,Valid loss: 2.80365, time : 12.731686115264893 lr : 0.9702989999999999
epoch : 0 [1723/21279] Train loss: 2.34370,Valid loss: 2.82562, time : 12.703648090362549 lr : 0.9702989999999999
epoch : 0 [1724/21279] Train loss: 2.37636,Valid loss: 2.96073, time : 12.88924193382263

epoch : 0 [1788/21279] Train loss: 2.16011,Valid loss: 2.72081, time : 12.146829605102539 lr : 0.9702989999999999
epoch : 0 [1789/21279] Train loss: 2.21572,Valid loss: 3.01003, time : 12.496015310287476 lr : 0.9702989999999999
epoch : 0 [1790/21279] Train loss: 2.17877,Valid loss: 2.61786, time : 12.10011339187622 lr : 0.9702989999999999
epoch : 0 [1791/21279] Train loss: 2.14771,Valid loss: 2.58887, time : 11.950153827667236 lr : 0.9702989999999999
epoch : 0 [1792/21279] Train loss: 2.15740,Valid loss: 2.64470, time : 12.035864114761353 lr : 0.9702989999999999
epoch : 0 [1793/21279] Train loss: 2.14703,Valid loss: 2.63343, time : 12.242386102676392 lr : 0.9702989999999999
epoch : 0 [1794/21279] Train loss: 2.15378,Valid loss: 2.78129, time : 12.352553606033325 lr : 0.9702989999999999
epoch : 0 [1795/21279] Train loss: 2.15235,Valid loss: 2.77107, time : 12.470820426940918 lr : 0.9702989999999999
epoch : 0 [1796/21279] Train loss: 2.21751,Valid loss: 3.32570, time : 18.255366563796997

epoch : 0 [1861/21279] Train loss: 2.00551,Valid loss: 2.57861, time : 12.112256050109863 lr : 0.9702989999999999
epoch : 0 [1862/21279] Train loss: 1.97808,Valid loss: 2.60533, time : 12.238425731658936 lr : 0.9702989999999999
epoch : 0 [1863/21279] Train loss: 1.98505,Valid loss: 2.61417, time : 12.211870908737183 lr : 0.9702989999999999
epoch : 0 [1864/21279] Train loss: 1.97466,Valid loss: 2.49281, time : 14.306227207183838 lr : 0.9702989999999999
epoch : 0 [1865/21279] Train loss: 2.02311,Valid loss: 2.60688, time : 12.729799270629883 lr : 0.9702989999999999
epoch : 0 [1866/21279] Train loss: 2.03491,Valid loss: 4.00958, time : 12.584155559539795 lr : 0.9702989999999999
epoch : 0 [1867/21279] Train loss: 2.33011,Valid loss: 2.92266, time : 12.662853956222534 lr : 0.9702989999999999
epoch : 0 [1868/21279] Train loss: 2.40198,Valid loss: 2.91692, time : 12.58060908317566 lr : 0.9702989999999999
epoch : 0 [1869/21279] Train loss: 2.06080,Valid loss: 3.13784, time : 12.65628457069397 

epoch : 0 [1933/21279] Train loss: 1.93214,Valid loss: 3.84265, time : 13.105113506317139 lr : 0.9702989999999999
epoch : 0 [1934/21279] Train loss: 1.88216,Valid loss: 3.24275, time : 14.471104860305786 lr : 0.9702989999999999
epoch : 0 [1935/21279] Train loss: 1.86723,Valid loss: 2.31087, time : 12.977384328842163 lr : 0.9702989999999999
epoch : 0 [1936/21279] Train loss: 1.85113,Valid loss: 2.40141, time : 12.87059998512268 lr : 0.9702989999999999
epoch : 0 [1937/21279] Train loss: 1.84744,Valid loss: 2.23017, time : 12.593381881713867 lr : 0.9702989999999999
epoch : 0 [1938/21279] Train loss: 1.83888,Valid loss: 2.49041, time : 12.847044467926025 lr : 0.9702989999999999
epoch : 0 [1939/21279] Train loss: 1.86429,Valid loss: 3.21522, time : 13.237163543701172 lr : 0.9702989999999999
epoch : 0 [1940/21279] Train loss: 1.89669,Valid loss: 3.25679, time : 13.003263235092163 lr : 0.9702989999999999
epoch : 0 [1941/21279] Train loss: 1.98794,Valid loss: 3.15183, time : 12.969293117523193

epoch : 0 [2006/21279] Train loss: 1.71563,Valid loss: 2.43920, time : 12.529792547225952 lr : 0.96059601
epoch : 0 [2007/21279] Train loss: 1.69849,Valid loss: 2.20748, time : 12.322857141494751 lr : 0.96059601
epoch : 0 [2008/21279] Train loss: 1.72649,Valid loss: 2.48922, time : 12.40959620475769 lr : 0.96059601
epoch : 0 [2009/21279] Train loss: 1.70551,Valid loss: 2.28570, time : 12.87293553352356 lr : 0.96059601
epoch : 0 [2010/21279] Train loss: 1.72271,Valid loss: 2.45901, time : 12.220920324325562 lr : 0.96059601
epoch : 0 [2011/21279] Train loss: 1.68002,Valid loss: 2.10004, time : 12.498003005981445 lr : 0.96059601
epoch : 0 [2012/21279] Train loss: 1.66256,Valid loss: 2.28522, time : 12.660335779190063 lr : 0.96059601
epoch : 0 [2013/21279] Train loss: 1.61954,Valid loss: 2.04198, time : 12.746720790863037 lr : 0.96059601
epoch : 0 [2014/21279] Train loss: 1.62188,Valid loss: 2.28402, time : 13.092591285705566 lr : 0.96059601
epoch : 0 [2015/21279] Train loss: 1.64995,Valid

epoch : 0 [2084/21279] Train loss: 1.69786,Valid loss: 2.21364, time : 12.644892930984497 lr : 0.96059601
epoch : 0 [2085/21279] Train loss: 1.56100,Valid loss: 2.04639, time : 12.648136854171753 lr : 0.96059601
epoch : 0 [2086/21279] Train loss: 1.55930,Valid loss: 2.07083, time : 13.922905206680298 lr : 0.96059601
epoch : 0 [2087/21279] Train loss: 1.54697,Valid loss: 1.98478, time : 11.82938814163208 lr : 0.96059601
epoch : 0 [2088/21279] Train loss: 1.53444,Valid loss: 1.97581, time : 12.200911521911621 lr : 0.96059601
epoch : 0 [2089/21279] Train loss: 1.53693,Valid loss: 1.95460, time : 12.52385663986206 lr : 0.96059601
epoch : 0 [2090/21279] Train loss: 1.51448,Valid loss: 1.93854, time : 11.866459846496582 lr : 0.96059601
epoch : 0 [2091/21279] Train loss: 1.49774,Valid loss: 1.97541, time : 12.432287693023682 lr : 0.96059601
epoch : 0 [2092/21279] Train loss: 1.48390,Valid loss: 2.06499, time : 12.112679243087769 lr : 0.96059601
epoch : 0 [2093/21279] Train loss: 1.48214,Valid

epoch : 0 [2162/21279] Train loss: 1.41653,Valid loss: 1.96477, time : 12.220099687576294 lr : 0.96059601
epoch : 0 [2163/21279] Train loss: 1.46164,Valid loss: 1.77321, time : 12.39795732498169 lr : 0.96059601
epoch : 0 [2164/21279] Train loss: 1.41531,Valid loss: 2.18045, time : 12.092102766036987 lr : 0.96059601
epoch : 0 [2165/21279] Train loss: 1.40718,Valid loss: 1.84963, time : 12.278567552566528 lr : 0.96059601
epoch : 0 [2166/21279] Train loss: 1.43554,Valid loss: 1.78461, time : 12.050926446914673 lr : 0.96059601
epoch : 0 [2167/21279] Train loss: 1.41726,Valid loss: 1.77183, time : 12.599772453308105 lr : 0.96059601
epoch : 0 [2168/21279] Train loss: 1.42204,Valid loss: 1.93757, time : 14.287091493606567 lr : 0.96059601
epoch : 0 [2169/21279] Train loss: 1.42149,Valid loss: 2.00444, time : 12.406437158584595 lr : 0.96059601
epoch : 0 [2170/21279] Train loss: 1.41462,Valid loss: 1.78715, time : 12.292273044586182 lr : 0.96059601
epoch : 0 [2171/21279] Train loss: 1.39732,Vali

epoch : 0 [2240/21279] Train loss: 1.31592,Valid loss: 1.75008, time : 12.840978384017944 lr : 0.96059601
epoch : 0 [2241/21279] Train loss: 1.32700,Valid loss: 1.81359, time : 12.798053503036499 lr : 0.96059601
epoch : 0 [2242/21279] Train loss: 1.34714,Valid loss: 1.65069, time : 12.092742919921875 lr : 0.96059601
epoch : 0 [2243/21279] Train loss: 1.33415,Valid loss: 1.88541, time : 12.423677444458008 lr : 0.96059601
epoch : 0 [2244/21279] Train loss: 1.32256,Valid loss: 1.88627, time : 12.760705947875977 lr : 0.96059601
epoch : 0 [2245/21279] Train loss: 1.33559,Valid loss: 2.08146, time : 12.740485906600952 lr : 0.96059601
epoch : 0 [2246/21279] Train loss: 1.29417,Valid loss: 1.62027, time : 13.212660551071167 lr : 0.96059601
epoch : 0 [2247/21279] Train loss: 1.30614,Valid loss: 1.90831, time : 12.174928665161133 lr : 0.96059601
epoch : 0 [2248/21279] Train loss: 1.31588,Valid loss: 2.49881, time : 12.309623956680298 lr : 0.96059601
epoch : 0 [2249/21279] Train loss: 1.34250,Val

epoch : 0 [2318/21279] Train loss: 1.35281,Valid loss: 2.15385, time : 14.384745121002197 lr : 0.96059601
epoch : 0 [2319/21279] Train loss: 1.35902,Valid loss: 1.83847, time : 12.675448894500732 lr : 0.96059601
epoch : 0 [2320/21279] Train loss: 1.41995,Valid loss: 1.88484, time : 12.721749067306519 lr : 0.96059601
epoch : 0 [2321/21279] Train loss: 1.23367,Valid loss: 1.63409, time : 12.681811571121216 lr : 0.96059601
epoch : 0 [2322/21279] Train loss: 1.18334,Valid loss: 1.68904, time : 12.639795064926147 lr : 0.96059601
epoch : 0 [2323/21279] Train loss: 1.18515,Valid loss: 1.63907, time : 12.656893730163574 lr : 0.96059601
epoch : 0 [2324/21279] Train loss: 1.18458,Valid loss: 1.47131, time : 12.749553680419922 lr : 0.96059601
epoch : 0 [2325/21279] Train loss: 1.17869,Valid loss: 1.58852, time : 12.542418241500854 lr : 0.96059601
epoch : 0 [2326/21279] Train loss: 1.18960,Valid loss: 1.50757, time : 12.349923372268677 lr : 0.96059601
epoch : 0 [2327/21279] Train loss: 1.18814,Val

epoch : 0 [2396/21279] Train loss: 1.31377,Valid loss: 2.80937, time : 12.379296779632568 lr : 0.96059601
epoch : 0 [2397/21279] Train loss: 1.39649,Valid loss: 2.55388, time : 12.593433856964111 lr : 0.96059601
epoch : 0 [2398/21279] Train loss: 1.39768,Valid loss: 2.47103, time : 13.126598119735718 lr : 0.96059601
epoch : 0 [2399/21279] Train loss: 1.54922,Valid loss: 2.02118, time : 12.663979530334473 lr : 0.96059601
epoch : 0 [2400/21279] Train loss: 1.26702,Valid loss: 2.01037, time : 18.862640619277954 lr : 0.96059601
epoch : 0 [2401/21279] Train loss: 1.26882,Valid loss: 3.45751, time : 13.398044109344482 lr : 0.96059601
epoch : 0 [2402/21279] Train loss: 1.43371,Valid loss: 1.95411, time : 13.400291442871094 lr : 0.96059601
epoch : 0 [2403/21279] Train loss: 1.19053,Valid loss: 1.76740, time : 13.360635995864868 lr : 0.96059601
epoch : 0 [2404/21279] Train loss: 1.10881,Valid loss: 1.62981, time : 12.558648586273193 lr : 0.96059601
epoch : 0 [2405/21279] Train loss: 1.11576,Val

epoch : 0 [2474/21279] Train loss: 1.03519,Valid loss: 1.56248, time : 12.55063271522522 lr : 0.96059601
epoch : 0 [2475/21279] Train loss: 1.04575,Valid loss: 1.83672, time : 12.549434661865234 lr : 0.96059601
epoch : 0 [2476/21279] Train loss: 1.26898,Valid loss: 2.66065, time : 12.886574268341064 lr : 0.96059601
epoch : 0 [2477/21279] Train loss: 1.77557,Valid loss: 1.79604, time : 12.574622869491577 lr : 0.96059601
epoch : 0 [2478/21279] Train loss: 1.25074,Valid loss: 2.80492, time : 12.619168519973755 lr : 0.96059601
epoch : 0 [2479/21279] Train loss: 1.99515,Valid loss: 2.36808, time : 12.311544418334961 lr : 0.96059601
epoch : 0 [2480/21279] Train loss: 1.20298,Valid loss: 1.87699, time : 12.627089262008667 lr : 0.96059601
epoch : 0 [2481/21279] Train loss: 1.38230,Valid loss: 2.99648, time : 11.768994331359863 lr : 0.96059601
epoch : 0 [2482/21279] Train loss: 1.64367,Valid loss: 2.39392, time : 14.087724924087524 lr : 0.96059601
epoch : 0 [2483/21279] Train loss: 1.51152,Vali

epoch : 0 [2548/21279] Train loss: 0.95977,Valid loss: 1.37067, time : 12.41135048866272 lr : 0.9509900498999999
epoch : 0 [2549/21279] Train loss: 0.93574,Valid loss: 1.34141, time : 11.959403038024902 lr : 0.9509900498999999
epoch : 0 [2550/21279] Train loss: 0.93866,Valid loss: 1.70707, time : 12.380141258239746 lr : 0.9509900498999999
epoch : 0 [2551/21279] Train loss: 0.95151,Valid loss: 1.41872, time : 12.57797122001648 lr : 0.9509900498999999
epoch : 0 [2552/21279] Train loss: 0.95073,Valid loss: 1.73121, time : 12.773714303970337 lr : 0.9509900498999999
epoch : 0 [2553/21279] Train loss: 0.97553,Valid loss: 1.40776, time : 12.68554973602295 lr : 0.9509900498999999
epoch : 0 [2554/21279] Train loss: 0.95716,Valid loss: 1.22087, time : 14.262469291687012 lr : 0.9509900498999999
epoch : 0 [2555/21279] Train loss: 0.95716,Valid loss: 1.28622, time : 12.927931547164917 lr : 0.9509900498999999
epoch : 0 [2556/21279] Train loss: 0.94533,Valid loss: 1.48884, time : 12.967152118682861 l

epoch : 0 [2621/21279] Train loss: 0.91586,Valid loss: 1.26117, time : 12.346925735473633 lr : 0.9509900498999999
epoch : 0 [2622/21279] Train loss: 0.89861,Valid loss: 1.21834, time : 12.726922512054443 lr : 0.9509900498999999
epoch : 0 [2623/21279] Train loss: 0.92788,Valid loss: 1.52056, time : 12.653240203857422 lr : 0.9509900498999999
epoch : 0 [2624/21279] Train loss: 0.91917,Valid loss: 1.28499, time : 12.389906167984009 lr : 0.9509900498999999
epoch : 0 [2625/21279] Train loss: 0.94026,Valid loss: 1.24179, time : 12.53123950958252 lr : 0.9509900498999999
epoch : 0 [2626/21279] Train loss: 0.95994,Valid loss: 1.85515, time : 12.418903112411499 lr : 0.9509900498999999
epoch : 0 [2627/21279] Train loss: 1.23805,Valid loss: 1.83093, time : 12.84696340560913 lr : 0.9509900498999999
epoch : 0 [2628/21279] Train loss: 1.29892,Valid loss: 1.98099, time : 12.827715396881104 lr : 0.9509900498999999
epoch : 0 [2629/21279] Train loss: 1.24199,Valid loss: 1.66519, time : 12.85149073600769 l

epoch : 0 [2694/21279] Train loss: 1.61849,Valid loss: 3.40239, time : 12.815024852752686 lr : 0.9509900498999999
epoch : 0 [2695/21279] Train loss: 1.32153,Valid loss: 1.83090, time : 13.082966089248657 lr : 0.9509900498999999
epoch : 0 [2696/21279] Train loss: 0.99960,Valid loss: 1.91937, time : 11.366324424743652 lr : 0.9509900498999999
epoch : 0 [2697/21279] Train loss: 1.06602,Valid loss: 1.66228, time : 11.714917659759521 lr : 0.9509900498999999
epoch : 0 [2698/21279] Train loss: 1.07708,Valid loss: 1.72795, time : 12.387201309204102 lr : 0.9509900498999999
epoch : 0 [2699/21279] Train loss: 1.02692,Valid loss: 2.39895, time : 12.751849889755249 lr : 0.9509900498999999
epoch : 0 [2700/21279] Train loss: 1.07242,Valid loss: 2.05176, time : 12.442394733428955 lr : 0.9509900498999999
epoch : 0 [2701/21279] Train loss: 0.95346,Valid loss: 1.56135, time : 12.695178747177124 lr : 0.9509900498999999
epoch : 0 [2702/21279] Train loss: 0.95403,Valid loss: 1.44206, time : 14.87740969657898

epoch : 0 [2766/21279] Train loss: 0.81351,Valid loss: 1.07290, time : 12.17188549041748 lr : 0.9509900498999999
epoch : 0 [2767/21279] Train loss: 0.79328,Valid loss: 1.44940, time : 12.24856448173523 lr : 0.9509900498999999
epoch : 0 [2768/21279] Train loss: 0.82309,Valid loss: 5.64332, time : 12.691197156906128 lr : 0.9509900498999999
epoch : 0 [2769/21279] Train loss: 0.88607,Valid loss: 1.27008, time : 12.973433494567871 lr : 0.9509900498999999
epoch : 0 [2770/21279] Train loss: 0.91631,Valid loss: 1.72349, time : 12.463257789611816 lr : 0.9509900498999999
epoch : 0 [2771/21279] Train loss: 0.80149,Valid loss: 1.38800, time : 12.762433528900146 lr : 0.9509900498999999
epoch : 0 [2772/21279] Train loss: 0.80021,Valid loss: 1.48116, time : 12.679940700531006 lr : 0.9509900498999999
epoch : 0 [2773/21279] Train loss: 0.80179,Valid loss: 1.37692, time : 12.60569143295288 lr : 0.9509900498999999
epoch : 0 [2774/21279] Train loss: 0.81622,Valid loss: 1.31337, time : 14.869816303253174 l

epoch : 0 [2838/21279] Train loss: 0.77537,Valid loss: 1.12601, time : 12.74875283241272 lr : 0.9509900498999999
epoch : 0 [2839/21279] Train loss: 0.74571,Valid loss: 1.25576, time : 12.775610446929932 lr : 0.9509900498999999
epoch : 0 [2840/21279] Train loss: 0.76546,Valid loss: 1.04632, time : 13.82476258277893 lr : 0.9509900498999999
epoch : 0 [2841/21279] Train loss: 0.77664,Valid loss: 1.33196, time : 12.229522466659546 lr : 0.9509900498999999
epoch : 0 [2842/21279] Train loss: 0.77431,Valid loss: 1.15467, time : 12.477795600891113 lr : 0.9509900498999999
epoch : 0 [2843/21279] Train loss: 0.80079,Valid loss: 1.13480, time : 12.561350584030151 lr : 0.9509900498999999
epoch : 0 [2844/21279] Train loss: 0.76283,Valid loss: 1.25399, time : 12.523926973342896 lr : 0.9509900498999999
epoch : 0 [2845/21279] Train loss: 0.78229,Valid loss: 1.53821, time : 12.34049367904663 lr : 0.9509900498999999
epoch : 0 [2846/21279] Train loss: 0.77586,Valid loss: 1.15629, time : 11.936272859573364 l

epoch : 0 [2911/21279] Train loss: 0.78187,Valid loss: 3.01116, time : 12.944546937942505 lr : 0.9509900498999999
epoch : 0 [2912/21279] Train loss: 0.88449,Valid loss: 1.04205, time : 12.642431497573853 lr : 0.9509900498999999
epoch : 0 [2913/21279] Train loss: 0.75296,Valid loss: 1.30310, time : 12.249051809310913 lr : 0.9509900498999999
epoch : 0 [2914/21279] Train loss: 0.74040,Valid loss: 1.32557, time : 12.356626510620117 lr : 0.9509900498999999
epoch : 0 [2915/21279] Train loss: 0.73216,Valid loss: 1.15303, time : 12.265696048736572 lr : 0.9509900498999999
epoch : 0 [2916/21279] Train loss: 0.72251,Valid loss: 1.24573, time : 12.792699813842773 lr : 0.9509900498999999
epoch : 0 [2917/21279] Train loss: 0.72068,Valid loss: 1.05560, time : 13.212721347808838 lr : 0.9509900498999999
epoch : 0 [2918/21279] Train loss: 0.73249,Valid loss: 1.12391, time : 13.19373869895935 lr : 0.9509900498999999
epoch : 0 [2919/21279] Train loss: 0.73874,Valid loss: 1.29113, time : 13.532281160354614

epoch : 0 [2983/21279] Train loss: 0.67947,Valid loss: 1.20148, time : 12.992031574249268 lr : 0.9509900498999999
epoch : 0 [2984/21279] Train loss: 0.66929,Valid loss: 1.09069, time : 12.616580724716187 lr : 0.9509900498999999
epoch : 0 [2985/21279] Train loss: 0.65986,Valid loss: 0.95390, time : 12.79565167427063 lr : 0.9509900498999999
epoch : 0 [2986/21279] Train loss: 0.69309,Valid loss: 1.28855, time : 12.805094480514526 lr : 0.9509900498999999
epoch : 0 [2987/21279] Train loss: 0.68312,Valid loss: 1.13131, time : 13.007441759109497 lr : 0.9509900498999999
epoch : 0 [2988/21279] Train loss: 0.66987,Valid loss: 1.18388, time : 13.366820812225342 lr : 0.9509900498999999
epoch : 0 [2989/21279] Train loss: 0.68264,Valid loss: 1.12487, time : 13.0923752784729 lr : 0.9509900498999999
epoch : 0 [2990/21279] Train loss: 0.71112,Valid loss: 1.06583, time : 13.212641954421997 lr : 0.9509900498999999
epoch : 0 [2991/21279] Train loss: 0.67055,Valid loss: 1.06428, time : 13.048264026641846 l

epoch : 0 [3055/21279] Train loss: 0.64931,Valid loss: 1.06180, time : 12.660974264144897 lr : 0.9414801494009999
epoch : 0 [3056/21279] Train loss: 0.65219,Valid loss: 1.55406, time : 12.815367460250854 lr : 0.9414801494009999
epoch : 0 [3057/21279] Train loss: 0.67988,Valid loss: 1.95447, time : 12.54345965385437 lr : 0.9414801494009999
epoch : 0 [3058/21279] Train loss: 0.88678,Valid loss: 9.23954, time : 12.526455640792847 lr : 0.9414801494009999
epoch : 0 [3059/21279] Train loss: 1.73770,Valid loss: 5.05362, time : 13.069568395614624 lr : 0.9414801494009999
epoch : 0 [3060/21279] Train loss: 1.21096,Valid loss: 5.14366, time : 15.054331302642822 lr : 0.9414801494009999
epoch : 0 [3061/21279] Train loss: 1.54974,Valid loss: 2.47950, time : 13.024780511856079 lr : 0.9414801494009999
epoch : 0 [3062/21279] Train loss: 1.06869,Valid loss: 2.88186, time : 12.71330213546753 lr : 0.9414801494009999
epoch : 0 [3063/21279] Train loss: 1.34448,Valid loss: 1.89562, time : 12.02882194519043 l

epoch : 0 [3127/21279] Train loss: 0.67861,Valid loss: 0.84086, time : 12.629479169845581 lr : 0.9414801494009999
epoch : 0 [3128/21279] Train loss: 0.64419,Valid loss: 1.15917, time : 12.382011651992798 lr : 0.9414801494009999
epoch : 0 [3129/21279] Train loss: 0.63929,Valid loss: 0.97092, time : 12.504655838012695 lr : 0.9414801494009999
epoch : 0 [3130/21279] Train loss: 0.64232,Valid loss: 1.63976, time : 18.93877387046814 lr : 0.9414801494009999
epoch : 0 [3131/21279] Train loss: 0.64341,Valid loss: 1.09058, time : 12.086425304412842 lr : 0.9414801494009999
epoch : 0 [3132/21279] Train loss: 0.64848,Valid loss: 1.18640, time : 12.466018438339233 lr : 0.9414801494009999
epoch : 0 [3133/21279] Train loss: 0.63970,Valid loss: 1.12043, time : 12.45583987236023 lr : 0.9414801494009999
epoch : 0 [3134/21279] Train loss: 0.61603,Valid loss: 1.26846, time : 12.581801414489746 lr : 0.9414801494009999
epoch : 0 [3135/21279] Train loss: 0.64528,Valid loss: 1.10358, time : 12.71087908744812 l

epoch : 0 [3199/21279] Train loss: 0.60235,Valid loss: 1.09699, time : 15.184834241867065 lr : 0.9414801494009999
epoch : 0 [3200/21279] Train loss: 0.68784,Valid loss: 1.40032, time : 12.65035629272461 lr : 0.9414801494009999
epoch : 0 [3201/21279] Train loss: 0.94514,Valid loss: 1.84036, time : 12.510462760925293 lr : 0.9414801494009999
epoch : 0 [3202/21279] Train loss: 0.98788,Valid loss: 1.45070, time : 12.919450283050537 lr : 0.9414801494009999
epoch : 0 [3203/21279] Train loss: 0.85415,Valid loss: 1.47873, time : 12.1730375289917 lr : 0.9414801494009999
epoch : 0 [3204/21279] Train loss: 0.73620,Valid loss: 1.33168, time : 12.711597919464111 lr : 0.9414801494009999
epoch : 0 [3205/21279] Train loss: 0.67553,Valid loss: 1.04419, time : 12.691314935684204 lr : 0.9414801494009999
epoch : 0 [3206/21279] Train loss: 0.62164,Valid loss: 0.88458, time : 12.333355188369751 lr : 0.9414801494009999
epoch : 0 [3207/21279] Train loss: 0.61618,Valid loss: 0.86434, time : 12.379780292510986 l

epoch : 0 [3272/21279] Train loss: 0.58012,Valid loss: 1.13800, time : 11.860382080078125 lr : 0.9414801494009999
epoch : 0 [3273/21279] Train loss: 0.57372,Valid loss: 0.95853, time : 12.229009628295898 lr : 0.9414801494009999
epoch : 0 [3274/21279] Train loss: 0.57348,Valid loss: 1.07193, time : 12.114859819412231 lr : 0.9414801494009999
epoch : 0 [3275/21279] Train loss: 0.55094,Valid loss: 1.01000, time : 12.71428918838501 lr : 0.9414801494009999
epoch : 0 [3276/21279] Train loss: 0.56495,Valid loss: 0.93259, time : 12.047304630279541 lr : 0.9414801494009999
epoch : 0 [3277/21279] Train loss: 0.57072,Valid loss: 0.93440, time : 13.06447696685791 lr : 0.9414801494009999
epoch : 0 [3278/21279] Train loss: 0.55910,Valid loss: 1.00946, time : 12.592250108718872 lr : 0.9414801494009999
epoch : 0 [3279/21279] Train loss: 0.55786,Valid loss: 0.98696, time : 13.155513048171997 lr : 0.9414801494009999
epoch : 0 [3280/21279] Train loss: 0.55636,Valid loss: 0.99237, time : 14.078559398651123 

epoch : 0 [3344/21279] Train loss: 0.56510,Valid loss: 0.88773, time : 11.905102729797363 lr : 0.9414801494009999
epoch : 0 [3345/21279] Train loss: 0.56621,Valid loss: 0.80053, time : 11.455352067947388 lr : 0.9414801494009999
epoch : 0 [3346/21279] Train loss: 0.55122,Valid loss: 0.66040, time : 12.209579467773438 lr : 0.9414801494009999
epoch : 0 [3347/21279] Train loss: 0.53274,Valid loss: 0.81810, time : 11.49461030960083 lr : 0.9414801494009999
epoch : 0 [3348/21279] Train loss: 0.55822,Valid loss: 0.91604, time : 11.800683975219727 lr : 0.9414801494009999
epoch : 0 [3349/21279] Train loss: 0.54425,Valid loss: 0.76539, time : 11.472675561904907 lr : 0.9414801494009999
epoch : 0 [3350/21279] Train loss: 0.54850,Valid loss: 0.94379, time : 14.32838225364685 lr : 0.9414801494009999
epoch : 0 [3351/21279] Train loss: 0.53923,Valid loss: 1.11671, time : 11.986329555511475 lr : 0.9414801494009999
epoch : 0 [3352/21279] Train loss: 0.53707,Valid loss: 0.86522, time : 12.083474397659302 

epoch : 0 [3417/21279] Train loss: 0.53803,Valid loss: 0.76584, time : 11.87417721748352 lr : 0.9414801494009999
epoch : 0 [3418/21279] Train loss: 0.50999,Valid loss: 0.65473, time : 12.17015266418457 lr : 0.9414801494009999
epoch : 0 [3419/21279] Train loss: 0.49606,Valid loss: 0.88420, time : 14.749756336212158 lr : 0.9414801494009999
epoch : 0 [3420/21279] Train loss: 0.52504,Valid loss: 0.78787, time : 11.933556079864502 lr : 0.9414801494009999
epoch : 0 [3421/21279] Train loss: 0.52273,Valid loss: 0.66227, time : 12.288264036178589 lr : 0.9414801494009999
epoch : 0 [3422/21279] Train loss: 0.50519,Valid loss: 0.97443, time : 12.04464077949524 lr : 0.9414801494009999
epoch : 0 [3423/21279] Train loss: 0.52160,Valid loss: 0.99596, time : 11.869939804077148 lr : 0.9414801494009999
epoch : 0 [3424/21279] Train loss: 0.52926,Valid loss: 1.00702, time : 11.929860591888428 lr : 0.9414801494009999
epoch : 0 [3425/21279] Train loss: 0.53790,Valid loss: 0.94483, time : 11.596354961395264 l

epoch : 0 [3489/21279] Train loss: 0.53546,Valid loss: 0.96043, time : 12.014861822128296 lr : 0.9414801494009999
epoch : 0 [3490/21279] Train loss: 0.51107,Valid loss: 1.11985, time : 12.557540893554688 lr : 0.9414801494009999
epoch : 0 [3491/21279] Train loss: 0.50391,Valid loss: 0.98713, time : 13.057382345199585 lr : 0.9414801494009999
epoch : 0 [3492/21279] Train loss: 0.51656,Valid loss: 1.07648, time : 12.436765193939209 lr : 0.9414801494009999
epoch : 0 [3493/21279] Train loss: 0.52037,Valid loss: 0.70336, time : 12.56402587890625 lr : 0.9414801494009999
epoch : 0 [3494/21279] Train loss: 0.50926,Valid loss: 0.72611, time : 12.742666482925415 lr : 0.9414801494009999
epoch : 0 [3495/21279] Train loss: 0.49980,Valid loss: 0.72893, time : 12.30281138420105 lr : 0.9414801494009999
epoch : 0 [3496/21279] Train loss: 0.50177,Valid loss: 0.71307, time : 11.826518535614014 lr : 0.9414801494009999
epoch : 0 [3497/21279] Train loss: 0.48949,Valid loss: 0.88470, time : 12.21642518043518 l

epoch : 0 [3562/21279] Train loss: 0.50001,Valid loss: 0.83681, time : 12.891029119491577 lr : 0.9320653479069899
epoch : 0 [3563/21279] Train loss: 0.49847,Valid loss: 0.72730, time : 13.238446235656738 lr : 0.9320653479069899
epoch : 0 [3564/21279] Train loss: 0.50002,Valid loss: 0.64291, time : 12.534640789031982 lr : 0.9320653479069899
epoch : 0 [3565/21279] Train loss: 0.47236,Valid loss: 0.71842, time : 12.99548602104187 lr : 0.9320653479069899
epoch : 0 [3566/21279] Train loss: 0.47355,Valid loss: 0.69079, time : 12.833388805389404 lr : 0.9320653479069899
epoch : 0 [3567/21279] Train loss: 0.47209,Valid loss: 0.83341, time : 12.865448713302612 lr : 0.9320653479069899
epoch : 0 [3568/21279] Train loss: 0.47364,Valid loss: 0.69425, time : 12.093527555465698 lr : 0.9320653479069899
epoch : 0 [3569/21279] Train loss: 0.48669,Valid loss: 0.89564, time : 12.394728183746338 lr : 0.9320653479069899
epoch : 0 [3570/21279] Train loss: 0.49315,Valid loss: 0.93661, time : 14.896553754806519

epoch : 0 [3634/21279] Train loss: 0.49137,Valid loss: 0.70922, time : 12.84322476387024 lr : 0.9320653479069899
epoch : 0 [3635/21279] Train loss: 0.48430,Valid loss: 0.78089, time : 13.01398754119873 lr : 0.9320653479069899
epoch : 0 [3636/21279] Train loss: 0.47222,Valid loss: 0.72394, time : 12.231327772140503 lr : 0.9320653479069899
epoch : 0 [3637/21279] Train loss: 0.46127,Valid loss: 0.71713, time : 12.263696908950806 lr : 0.9320653479069899
epoch : 0 [3638/21279] Train loss: 0.45974,Valid loss: 0.70883, time : 12.203615427017212 lr : 0.9320653479069899
epoch : 0 [3639/21279] Train loss: 0.45821,Valid loss: 0.73889, time : 12.357778072357178 lr : 0.9320653479069899
epoch : 0 [3640/21279] Train loss: 0.47646,Valid loss: 0.91719, time : 14.1509690284729 lr : 0.9320653479069899
epoch : 0 [3641/21279] Train loss: 0.47020,Valid loss: 0.68058, time : 12.327698469161987 lr : 0.9320653479069899
epoch : 0 [3642/21279] Train loss: 0.47716,Valid loss: 0.59317, time : 12.77692699432373 lr 

epoch : 0 [3707/21279] Train loss: 0.44139,Valid loss: 0.68073, time : 13.037774801254272 lr : 0.9320653479069899
epoch : 0 [3708/21279] Train loss: 0.45025,Valid loss: 0.58006, time : 14.575669288635254 lr : 0.9320653479069899
epoch : 0 [3709/21279] Train loss: 0.44028,Valid loss: 0.81596, time : 13.000950336456299 lr : 0.9320653479069899
epoch : 0 [3710/21279] Train loss: 0.44944,Valid loss: 0.67920, time : 12.918215990066528 lr : 0.9320653479069899
epoch : 0 [3711/21279] Train loss: 0.46228,Valid loss: 0.87552, time : 12.980267524719238 lr : 0.9320653479069899
epoch : 0 [3712/21279] Train loss: 0.44846,Valid loss: 0.61077, time : 12.966789960861206 lr : 0.9320653479069899
epoch : 0 [3713/21279] Train loss: 0.47191,Valid loss: 1.06346, time : 13.187034606933594 lr : 0.9320653479069899
epoch : 0 [3714/21279] Train loss: 0.43759,Valid loss: 0.82266, time : 12.078485250473022 lr : 0.9320653479069899
epoch : 0 [3715/21279] Train loss: 0.43210,Valid loss: 0.67690, time : 12.38076663017273

epoch : 0 [3779/21279] Train loss: 0.44940,Valid loss: 0.90091, time : 12.883806228637695 lr : 0.9320653479069899
epoch : 0 [3780/21279] Train loss: 0.44736,Valid loss: 0.82808, time : 12.915379524230957 lr : 0.9320653479069899
epoch : 0 [3781/21279] Train loss: 0.43016,Valid loss: 0.62057, time : 13.110613346099854 lr : 0.9320653479069899
epoch : 0 [3782/21279] Train loss: 0.44783,Valid loss: 0.66264, time : 12.605066299438477 lr : 0.9320653479069899
epoch : 0 [3783/21279] Train loss: 0.45264,Valid loss: 0.69007, time : 13.04783296585083 lr : 0.9320653479069899
epoch : 0 [3784/21279] Train loss: 0.43491,Valid loss: 0.60999, time : 12.330165386199951 lr : 0.9320653479069899
epoch : 0 [3785/21279] Train loss: 0.43916,Valid loss: 0.58925, time : 12.643773794174194 lr : 0.9320653479069899
epoch : 0 [3786/21279] Train loss: 0.42403,Valid loss: 0.63388, time : 12.82482385635376 lr : 0.9320653479069899
epoch : 0 [3787/21279] Train loss: 0.42719,Valid loss: 1.14321, time : 12.009418487548828 

epoch : 0 [3851/21279] Train loss: 0.40875,Valid loss: 0.60723, time : 13.176460266113281 lr : 0.9320653479069899
epoch : 0 [3852/21279] Train loss: 0.42292,Valid loss: 0.58623, time : 12.737140417098999 lr : 0.9320653479069899
epoch : 0 [3853/21279] Train loss: 0.41342,Valid loss: 0.57356, time : 12.971648216247559 lr : 0.9320653479069899
epoch : 0 [3854/21279] Train loss: 0.39331,Valid loss: 0.79112, time : 12.888566255569458 lr : 0.9320653479069899
epoch : 0 [3855/21279] Train loss: 0.40491,Valid loss: 0.57162, time : 12.85514235496521 lr : 0.9320653479069899
epoch : 0 [3856/21279] Train loss: 0.41398,Valid loss: 0.89544, time : 12.837145566940308 lr : 0.9320653479069899
epoch : 0 [3857/21279] Train loss: 0.43388,Valid loss: 0.74362, time : 12.941643238067627 lr : 0.9320653479069899
epoch : 0 [3858/21279] Train loss: 0.42359,Valid loss: 0.66353, time : 14.84425973892212 lr : 0.9320653479069899
epoch : 0 [3859/21279] Train loss: 0.45382,Valid loss: 0.66940, time : 12.313734769821167 

epoch : 0 [3924/21279] Train loss: 0.42190,Valid loss: 0.55621, time : 12.652858972549438 lr : 0.9320653479069899
epoch : 0 [3925/21279] Train loss: 0.40387,Valid loss: 0.55014, time : 11.880833625793457 lr : 0.9320653479069899
epoch : 0 [3926/21279] Train loss: 0.41315,Valid loss: 0.56203, time : 12.063462734222412 lr : 0.9320653479069899
epoch : 0 [3927/21279] Train loss: 0.40823,Valid loss: 0.64636, time : 12.562098264694214 lr : 0.9320653479069899
epoch : 0 [3928/21279] Train loss: 0.41365,Valid loss: 0.88856, time : 14.691197633743286 lr : 0.9320653479069899
epoch : 0 [3929/21279] Train loss: 0.44680,Valid loss: 0.56395, time : 12.487646579742432 lr : 0.9320653479069899
epoch : 0 [3930/21279] Train loss: 0.42589,Valid loss: 1.32158, time : 12.907156467437744 lr : 0.9320653479069899
epoch : 0 [3931/21279] Train loss: 0.42360,Valid loss: 0.53652, time : 12.035483837127686 lr : 0.9320653479069899
epoch : 0 [3932/21279] Train loss: 0.39486,Valid loss: 0.59491, time : 12.31707334518432

epoch : 0 [3996/21279] Train loss: 0.44073,Valid loss: 1.99707, time : 13.227159261703491 lr : 0.9320653479069899
epoch : 0 [3997/21279] Train loss: 0.56687,Valid loss: 2.06724, time : 13.185062646865845 lr : 0.9320653479069899
epoch : 0 [3998/21279] Train loss: 0.50901,Valid loss: 0.76202, time : 14.743987798690796 lr : 0.9320653479069899
epoch : 0 [3999/21279] Train loss: 0.43386,Valid loss: 0.61674, time : 12.651503562927246 lr : 0.92274469442792
epoch : 0 [4000/21279] Train loss: 0.43022,Valid loss: 0.73043, time : 12.651089668273926 lr : 0.92274469442792
epoch : 0 [4001/21279] Train loss: 0.41525,Valid loss: 0.86781, time : 12.708543539047241 lr : 0.92274469442792
epoch : 0 [4002/21279] Train loss: 0.39624,Valid loss: 0.70795, time : 12.902281761169434 lr : 0.92274469442792
epoch : 0 [4003/21279] Train loss: 0.39589,Valid loss: 0.58215, time : 12.968499660491943 lr : 0.92274469442792
epoch : 0 [4004/21279] Train loss: 0.39435,Valid loss: 0.63538, time : 12.881847858428955 lr : 0.9

epoch : 0 [4070/21279] Train loss: 0.37530,Valid loss: 0.48323, time : 12.644337892532349 lr : 0.92274469442792
epoch : 0 [4071/21279] Train loss: 0.37206,Valid loss: 1.24688, time : 12.277207851409912 lr : 0.92274469442792
epoch : 0 [4072/21279] Train loss: 0.49564,Valid loss: 0.51212, time : 12.557345628738403 lr : 0.92274469442792
epoch : 0 [4073/21279] Train loss: 0.40921,Valid loss: 1.63116, time : 12.999062538146973 lr : 0.92274469442792
epoch : 0 [4074/21279] Train loss: 0.45418,Valid loss: 0.80467, time : 13.095121145248413 lr : 0.92274469442792
epoch : 0 [4075/21279] Train loss: 0.40448,Valid loss: 0.98831, time : 12.76815938949585 lr : 0.92274469442792
epoch : 0 [4076/21279] Train loss: 0.42011,Valid loss: 0.76282, time : 12.648575067520142 lr : 0.92274469442792
epoch : 0 [4077/21279] Train loss: 0.40656,Valid loss: 1.68353, time : 12.59807014465332 lr : 0.92274469442792
epoch : 0 [4078/21279] Train loss: 0.45239,Valid loss: 1.17853, time : 12.808742046356201 lr : 0.922744694

epoch : 0 [4144/21279] Train loss: 0.37304,Valid loss: 0.48142, time : 12.646729230880737 lr : 0.92274469442792
epoch : 0 [4145/21279] Train loss: 0.36715,Valid loss: 0.73355, time : 12.943174839019775 lr : 0.92274469442792
epoch : 0 [4146/21279] Train loss: 0.34434,Valid loss: 0.70478, time : 13.412962436676025 lr : 0.92274469442792
epoch : 0 [4147/21279] Train loss: 0.36740,Valid loss: 0.71864, time : 12.864818811416626 lr : 0.92274469442792
epoch : 0 [4148/21279] Train loss: 0.35060,Valid loss: 0.83462, time : 14.919934511184692 lr : 0.92274469442792
epoch : 0 [4149/21279] Train loss: 0.36964,Valid loss: 0.49293, time : 12.69154691696167 lr : 0.92274469442792
epoch : 0 [4150/21279] Train loss: 0.37300,Valid loss: 0.48691, time : 13.243719339370728 lr : 0.92274469442792
epoch : 0 [4151/21279] Train loss: 0.34743,Valid loss: 0.52164, time : 12.582675695419312 lr : 0.92274469442792
epoch : 0 [4152/21279] Train loss: 0.35659,Valid loss: 0.54770, time : 12.888686418533325 lr : 0.92274469

epoch : 0 [4218/21279] Train loss: 0.36634,Valid loss: 0.49764, time : 16.117268562316895 lr : 0.92274469442792
epoch : 0 [4219/21279] Train loss: 0.36484,Valid loss: 0.57863, time : 13.367289304733276 lr : 0.92274469442792
epoch : 0 [4220/21279] Train loss: 0.35666,Valid loss: 0.66248, time : 13.099080562591553 lr : 0.92274469442792
epoch : 0 [4221/21279] Train loss: 0.35910,Valid loss: 0.80136, time : 13.068159103393555 lr : 0.92274469442792
epoch : 0 [4222/21279] Train loss: 0.35504,Valid loss: 0.57919, time : 13.212485313415527 lr : 0.92274469442792
epoch : 0 [4223/21279] Train loss: 0.34615,Valid loss: 0.57620, time : 13.166695594787598 lr : 0.92274469442792
epoch : 0 [4224/21279] Train loss: 0.36671,Valid loss: 0.86354, time : 13.164437770843506 lr : 0.92274469442792
epoch : 0 [4225/21279] Train loss: 0.32085,Valid loss: 0.54444, time : 12.85994029045105 lr : 0.92274469442792
epoch : 0 [4226/21279] Train loss: 0.34947,Valid loss: 0.49413, time : 12.881272554397583 lr : 0.92274469

epoch : 0 [4292/21279] Train loss: 0.36565,Valid loss: 0.71965, time : 12.72761583328247 lr : 0.92274469442792
epoch : 0 [4293/21279] Train loss: 0.36071,Valid loss: 0.88897, time : 12.277389287948608 lr : 0.92274469442792
epoch : 0 [4294/21279] Train loss: 0.34799,Valid loss: 0.80893, time : 12.239670753479004 lr : 0.92274469442792
epoch : 0 [4295/21279] Train loss: 0.34116,Valid loss: 0.77608, time : 12.64586853981018 lr : 0.92274469442792
epoch : 0 [4296/21279] Train loss: 0.33438,Valid loss: 0.44534, time : 12.650506973266602 lr : 0.92274469442792
epoch : 0 [4297/21279] Train loss: 0.32265,Valid loss: 0.60829, time : 12.679170370101929 lr : 0.92274469442792
epoch : 0 [4298/21279] Train loss: 0.35499,Valid loss: 0.60752, time : 11.993967294692993 lr : 0.92274469442792
epoch : 0 [4299/21279] Train loss: 0.32995,Valid loss: 0.90242, time : 12.247474908828735 lr : 0.92274469442792
epoch : 0 [4300/21279] Train loss: 0.33669,Valid loss: 0.43627, time : 13.46817946434021 lr : 0.9227446944

epoch : 0 [4366/21279] Train loss: 0.40117,Valid loss: 0.54206, time : 12.16736888885498 lr : 0.92274469442792
epoch : 0 [4367/21279] Train loss: 0.38166,Valid loss: 0.67102, time : 11.948765516281128 lr : 0.92274469442792
epoch : 0 [4368/21279] Train loss: 0.37783,Valid loss: 0.50494, time : 14.615635395050049 lr : 0.92274469442792
epoch : 0 [4369/21279] Train loss: 0.37982,Valid loss: 0.66802, time : 12.145346641540527 lr : 0.92274469442792
epoch : 0 [4370/21279] Train loss: 0.36063,Valid loss: 0.48677, time : 12.274199962615967 lr : 0.92274469442792
epoch : 0 [4371/21279] Train loss: 0.35381,Valid loss: 0.47221, time : 11.93342638015747 lr : 0.92274469442792
epoch : 0 [4372/21279] Train loss: 0.35544,Valid loss: 0.71784, time : 11.919773817062378 lr : 0.92274469442792
epoch : 0 [4373/21279] Train loss: 0.34740,Valid loss: 0.44560, time : 12.284388065338135 lr : 0.92274469442792
epoch : 0 [4374/21279] Train loss: 0.33470,Valid loss: 0.46982, time : 12.359545469284058 lr : 0.922744694

epoch : 0 [4440/21279] Train loss: 0.33232,Valid loss: 0.53986, time : 12.38451886177063 lr : 0.92274469442792
epoch : 0 [4441/21279] Train loss: 0.34139,Valid loss: 0.44153, time : 12.816766738891602 lr : 0.92274469442792
epoch : 0 [4442/21279] Train loss: 0.33513,Valid loss: 0.42786, time : 13.019991159439087 lr : 0.92274469442792
epoch : 0 [4443/21279] Train loss: 0.35591,Valid loss: 0.43964, time : 12.939472675323486 lr : 0.92274469442792
epoch : 0 [4444/21279] Train loss: 0.34314,Valid loss: 0.43445, time : 11.786344289779663 lr : 0.92274469442792
epoch : 0 [4445/21279] Train loss: 0.32382,Valid loss: 0.44429, time : 11.71757173538208 lr : 0.92274469442792
epoch : 0 [4446/21279] Train loss: 0.32211,Valid loss: 0.41865, time : 11.68004322052002 lr : 0.92274469442792
epoch : 0 [4447/21279] Train loss: 0.30576,Valid loss: 0.42791, time : 12.462132453918457 lr : 0.92274469442792
epoch : 0 [4448/21279] Train loss: 0.32393,Valid loss: 0.42697, time : 11.737127780914307 lr : 0.9227446944

epoch : 0 [4514/21279] Train loss: 0.32381,Valid loss: 0.48963, time : 12.909282684326172 lr : 0.9135172474836407
epoch : 0 [4515/21279] Train loss: 0.32923,Valid loss: 0.43989, time : 12.754148244857788 lr : 0.9135172474836407
epoch : 0 [4516/21279] Train loss: 0.32185,Valid loss: 0.44834, time : 12.60048532485962 lr : 0.9135172474836407
epoch : 0 [4517/21279] Train loss: 0.30960,Valid loss: 0.44999, time : 12.378232717514038 lr : 0.9135172474836407
epoch : 0 [4518/21279] Train loss: 0.30717,Valid loss: 0.42802, time : 12.332618951797485 lr : 0.9135172474836407
epoch : 0 [4519/21279] Train loss: 0.31358,Valid loss: 0.43476, time : 12.158812284469604 lr : 0.9135172474836407
epoch : 0 [4520/21279] Train loss: 0.32764,Valid loss: 0.44079, time : 14.347561120986938 lr : 0.9135172474836407
epoch : 0 [4521/21279] Train loss: 0.32863,Valid loss: 0.45197, time : 12.405471086502075 lr : 0.9135172474836407
epoch : 0 [4522/21279] Train loss: 0.32145,Valid loss: 0.42695, time : 12.433594226837158

epoch : 0 [4586/21279] Train loss: 0.31336,Valid loss: 0.43619, time : 12.099249601364136 lr : 0.9135172474836407
epoch : 0 [4587/21279] Train loss: 0.30870,Valid loss: 0.42245, time : 12.341465473175049 lr : 0.9135172474836407
epoch : 0 [4588/21279] Train loss: 0.30988,Valid loss: 0.44546, time : 14.727178573608398 lr : 0.9135172474836407
epoch : 0 [4589/21279] Train loss: 0.31090,Valid loss: 0.41015, time : 12.57773470878601 lr : 0.9135172474836407
epoch : 0 [4590/21279] Train loss: 0.31019,Valid loss: 0.43505, time : 12.297872066497803 lr : 0.9135172474836407
epoch : 0 [4591/21279] Train loss: 0.31028,Valid loss: 0.42125, time : 12.137362480163574 lr : 0.9135172474836407
epoch : 0 [4592/21279] Train loss: 0.30807,Valid loss: 0.43888, time : 12.570972204208374 lr : 0.9135172474836407
epoch : 0 [4593/21279] Train loss: 0.29866,Valid loss: 0.41136, time : 12.276866436004639 lr : 0.9135172474836407
epoch : 0 [4594/21279] Train loss: 0.29058,Valid loss: 0.43637, time : 12.635673761367798

epoch : 0 [4658/21279] Train loss: 0.30057,Valid loss: 0.43472, time : 16.514805555343628 lr : 0.9135172474836407
epoch : 0 [4659/21279] Train loss: 0.30898,Valid loss: 0.41536, time : 12.350493431091309 lr : 0.9135172474836407
epoch : 0 [4660/21279] Train loss: 0.30375,Valid loss: 0.41072, time : 12.574106216430664 lr : 0.9135172474836407
epoch : 0 [4661/21279] Train loss: 0.28734,Valid loss: 0.44582, time : 12.242886066436768 lr : 0.9135172474836407
epoch : 0 [4662/21279] Train loss: 0.30703,Valid loss: 0.42922, time : 12.603748798370361 lr : 0.9135172474836407
epoch : 0 [4663/21279] Train loss: 0.29602,Valid loss: 0.40906, time : 12.530096769332886 lr : 0.9135172474836407
epoch : 0 [4664/21279] Train loss: 0.32379,Valid loss: 0.44156, time : 12.537536382675171 lr : 0.9135172474836407
epoch : 0 [4665/21279] Train loss: 0.28566,Valid loss: 0.67335, time : 12.892012119293213 lr : 0.9135172474836407
epoch : 0 [4666/21279] Train loss: 0.28535,Valid loss: 0.45786, time : 12.61403989791870

epoch : 0 [4730/21279] Train loss: 1.03789,Valid loss: 2.44868, time : 12.152060508728027 lr : 0.9135172474836407
epoch : 0 [4731/21279] Train loss: 0.58494,Valid loss: 0.96175, time : 12.69749402999878 lr : 0.9135172474836407
epoch : 0 [4732/21279] Train loss: 0.51447,Valid loss: 1.28423, time : 12.268080711364746 lr : 0.9135172474836407
epoch : 0 [4733/21279] Train loss: 0.52522,Valid loss: 2.06048, time : 12.004526376724243 lr : 0.9135172474836407
epoch : 0 [4734/21279] Train loss: 1.00594,Valid loss: 2.97503, time : 12.450256824493408 lr : 0.9135172474836407
epoch : 0 [4735/21279] Train loss: 0.72229,Valid loss: 1.18758, time : 12.081337928771973 lr : 0.9135172474836407
epoch : 0 [4736/21279] Train loss: 0.44114,Valid loss: 0.57405, time : 12.47498869895935 lr : 0.9135172474836407
epoch : 0 [4737/21279] Train loss: 0.34968,Valid loss: 1.11414, time : 12.2597177028656 lr : 0.9135172474836407
epoch : 0 [4738/21279] Train loss: 0.47114,Valid loss: 0.72405, time : 12.556892156600952 lr

epoch : 0 [4802/21279] Train loss: 0.27068,Valid loss: 0.41965, time : 12.19822883605957 lr : 0.9135172474836407
epoch : 0 [4803/21279] Train loss: 0.28531,Valid loss: 0.78771, time : 12.235310554504395 lr : 0.9135172474836407
epoch : 0 [4804/21279] Train loss: 0.28338,Valid loss: 0.63800, time : 12.325988531112671 lr : 0.9135172474836407
epoch : 0 [4805/21279] Train loss: 0.30091,Valid loss: 0.53130, time : 12.32691478729248 lr : 0.9135172474836407
epoch : 0 [4806/21279] Train loss: 0.28507,Valid loss: 0.86162, time : 12.506841897964478 lr : 0.9135172474836407
epoch : 0 [4807/21279] Train loss: 0.30247,Valid loss: 0.63336, time : 12.106737613677979 lr : 0.9135172474836407
epoch : 0 [4808/21279] Train loss: 0.29978,Valid loss: 0.38244, time : 14.037492036819458 lr : 0.9135172474836407
epoch : 0 [4809/21279] Train loss: 0.28632,Valid loss: 0.38622, time : 12.436872959136963 lr : 0.9135172474836407
epoch : 0 [4810/21279] Train loss: 0.27644,Valid loss: 0.39145, time : 11.784549236297607 

epoch : 0 [4874/21279] Train loss: 0.26690,Valid loss: 0.38857, time : 11.497272253036499 lr : 0.9135172474836407
epoch : 0 [4875/21279] Train loss: 0.27681,Valid loss: 0.38187, time : 12.152610063552856 lr : 0.9135172474836407
epoch : 0 [4876/21279] Train loss: 0.26729,Valid loss: 0.44347, time : 11.862439155578613 lr : 0.9135172474836407
epoch : 0 [4877/21279] Train loss: 0.30076,Valid loss: 0.39131, time : 12.196329116821289 lr : 0.9135172474836407
epoch : 0 [4878/21279] Train loss: 0.27336,Valid loss: 0.38035, time : 14.02847409248352 lr : 0.9135172474836407
epoch : 0 [4879/21279] Train loss: 0.27225,Valid loss: 0.40575, time : 12.207598209381104 lr : 0.9135172474836407
epoch : 0 [4880/21279] Train loss: 0.26128,Valid loss: 0.49446, time : 12.427424430847168 lr : 0.9135172474836407
epoch : 0 [4881/21279] Train loss: 0.27923,Valid loss: 0.37756, time : 12.829784154891968 lr : 0.9135172474836407
epoch : 0 [4882/21279] Train loss: 0.27307,Valid loss: 0.37526, time : 12.486396312713623

epoch : 0 [4946/21279] Train loss: 0.26536,Valid loss: 0.35469, time : 12.406588792800903 lr : 0.9135172474836407
epoch : 0 [4947/21279] Train loss: 0.29565,Valid loss: 0.37280, time : 12.218810558319092 lr : 0.9135172474836407
epoch : 0 [4948/21279] Train loss: 0.27923,Valid loss: 0.39709, time : 12.049067974090576 lr : 0.9135172474836407
epoch : 0 [4949/21279] Train loss: 0.26698,Valid loss: 0.39177, time : 12.025582790374756 lr : 0.9135172474836407
epoch : 0 [4950/21279] Train loss: 0.27606,Valid loss: 0.40147, time : 12.045378923416138 lr : 0.9135172474836407
epoch : 0 [4951/21279] Train loss: 0.27093,Valid loss: 0.37327, time : 12.381351470947266 lr : 0.9135172474836407
epoch : 0 [4952/21279] Train loss: 0.28305,Valid loss: 0.38851, time : 13.08086085319519 lr : 0.9135172474836407
epoch : 0 [4953/21279] Train loss: 0.26134,Valid loss: 0.39618, time : 12.425501346588135 lr : 0.9135172474836407
epoch : 0 [4954/21279] Train loss: 0.25648,Valid loss: 0.38643, time : 12.624325037002563

epoch : 0 [5018/21279] Train loss: 0.28076,Valid loss: 0.37252, time : 12.493096113204956 lr : 0.9043820750088043
epoch : 0 [5019/21279] Train loss: 0.27256,Valid loss: 0.36575, time : 12.276081562042236 lr : 0.9043820750088043
epoch : 0 [5020/21279] Train loss: 0.24803,Valid loss: 0.36493, time : 12.751763582229614 lr : 0.9043820750088043
epoch : 0 [5021/21279] Train loss: 0.26816,Valid loss: 0.36086, time : 12.491014003753662 lr : 0.9043820750088043
epoch : 0 [5022/21279] Train loss: 0.25731,Valid loss: 0.41014, time : 12.612837791442871 lr : 0.9043820750088043
epoch : 0 [5023/21279] Train loss: 0.29831,Valid loss: 0.97740, time : 12.431851625442505 lr : 0.9043820750088043
epoch : 0 [5024/21279] Train loss: 0.40087,Valid loss: 4.59002, time : 12.12025260925293 lr : 0.9043820750088043
epoch : 0 [5025/21279] Train loss: 1.13917,Valid loss: 0.72446, time : 11.928860902786255 lr : 0.9043820750088043
epoch : 0 [5026/21279] Train loss: 0.46607,Valid loss: 0.65310, time : 11.830943584442139

epoch : 0 [5091/21279] Train loss: 0.25822,Valid loss: 0.34497, time : 11.926701545715332 lr : 0.9043820750088043
epoch : 0 [5092/21279] Train loss: 0.26731,Valid loss: 0.35922, time : 11.747545719146729 lr : 0.9043820750088043
epoch : 0 [5093/21279] Train loss: 0.25916,Valid loss: 0.34920, time : 12.55351996421814 lr : 0.9043820750088043
epoch : 0 [5094/21279] Train loss: 0.26152,Valid loss: 0.34827, time : 12.433408737182617 lr : 0.9043820750088043
epoch : 0 [5095/21279] Train loss: 0.23407,Valid loss: 0.35815, time : 11.6055428981781 lr : 0.9043820750088043
epoch : 0 [5096/21279] Train loss: 0.24401,Valid loss: 0.35619, time : 11.828759908676147 lr : 0.9043820750088043
epoch : 0 [5097/21279] Train loss: 0.26580,Valid loss: 0.36557, time : 12.048454523086548 lr : 0.9043820750088043
epoch : 0 [5098/21279] Train loss: 0.26219,Valid loss: 0.34649, time : 13.157980918884277 lr : 0.9043820750088043
epoch : 0 [5099/21279] Train loss: 0.25431,Valid loss: 0.34672, time : 12.086944103240967 l

epoch : 0 [5164/21279] Train loss: 0.26986,Valid loss: 0.51049, time : 14.709912776947021 lr : 0.9043820750088043
epoch : 0 [5165/21279] Train loss: 0.25946,Valid loss: 0.62576, time : 12.247596740722656 lr : 0.9043820750088043
epoch : 0 [5166/21279] Train loss: 0.25293,Valid loss: 0.34835, time : 11.618419885635376 lr : 0.9043820750088043
epoch : 0 [5167/21279] Train loss: 0.24879,Valid loss: 0.49972, time : 12.25499939918518 lr : 0.9043820750088043
epoch : 0 [5168/21279] Train loss: 0.24588,Valid loss: 0.36825, time : 11.574268579483032 lr : 0.9043820750088043
epoch : 0 [5169/21279] Train loss: 0.26261,Valid loss: 0.50563, time : 12.34014344215393 lr : 0.9043820750088043
epoch : 0 [5170/21279] Train loss: 0.26699,Valid loss: 0.60108, time : 12.069054126739502 lr : 0.9043820750088043
epoch : 0 [5171/21279] Train loss: 0.26127,Valid loss: 0.49535, time : 12.089983463287354 lr : 0.9043820750088043
epoch : 0 [5172/21279] Train loss: 0.27126,Valid loss: 0.50922, time : 12.17328953742981 l

epoch : 0 [5237/21279] Train loss: 0.28459,Valid loss: 0.59963, time : 12.646450757980347 lr : 0.9043820750088043
epoch : 0 [5238/21279] Train loss: 0.24415,Valid loss: 0.31661, time : 13.341537475585938 lr : 0.9043820750088043
epoch : 0 [5239/21279] Train loss: 0.25331,Valid loss: 0.47864, time : 12.64839482307434 lr : 0.9043820750088043
epoch : 0 [5240/21279] Train loss: 0.26378,Valid loss: 1.10206, time : 13.05915117263794 lr : 0.9043820750088043
epoch : 0 [5241/21279] Train loss: 0.27445,Valid loss: 0.33794, time : 12.950210809707642 lr : 0.9043820750088043
epoch : 0 [5242/21279] Train loss: 0.25359,Valid loss: 0.71388, time : 12.870055198669434 lr : 0.9043820750088043
epoch : 0 [5243/21279] Train loss: 0.25691,Valid loss: 0.33869, time : 12.416577816009521 lr : 0.9043820750088043
epoch : 0 [5244/21279] Train loss: 0.24863,Valid loss: 0.33336, time : 12.646918535232544 lr : 0.9043820750088043
epoch : 0 [5245/21279] Train loss: 0.24921,Valid loss: 0.31448, time : 12.540914297103882 

epoch : 0 [5310/21279] Train loss: 0.30238,Valid loss: 0.58339, time : 11.024847030639648 lr : 0.9043820750088043
epoch : 0 [5311/21279] Train loss: 0.28622,Valid loss: 0.39677, time : 11.909515380859375 lr : 0.9043820750088043
epoch : 0 [5312/21279] Train loss: 0.24095,Valid loss: 0.39583, time : 12.46169924736023 lr : 0.9043820750088043
epoch : 0 [5313/21279] Train loss: 0.25741,Valid loss: 0.35534, time : 11.840412855148315 lr : 0.9043820750088043
epoch : 0 [5314/21279] Train loss: 0.24972,Valid loss: 0.33745, time : 11.735843181610107 lr : 0.9043820750088043
epoch : 0 [5315/21279] Train loss: 0.23655,Valid loss: 0.32086, time : 12.269699811935425 lr : 0.9043820750088043
epoch : 0 [5316/21279] Train loss: 0.22073,Valid loss: 0.34433, time : 12.187319040298462 lr : 0.9043820750088043
epoch : 0 [5317/21279] Train loss: 0.23720,Valid loss: 0.33764, time : 12.234829902648926 lr : 0.9043820750088043
epoch : 0 [5318/21279] Train loss: 0.23009,Valid loss: 0.32609, time : 15.494567394256592

epoch : 0 [5383/21279] Train loss: 0.24294,Valid loss: 0.61969, time : 13.361079454421997 lr : 0.9043820750088043
epoch : 0 [5384/21279] Train loss: 0.24670,Valid loss: 0.60902, time : 13.059024572372437 lr : 0.9043820750088043
epoch : 0 [5385/21279] Train loss: 0.24698,Valid loss: 0.48567, time : 12.490860223770142 lr : 0.9043820750088043
epoch : 0 [5386/21279] Train loss: 0.23870,Valid loss: 0.32283, time : 12.59480333328247 lr : 0.9043820750088043
epoch : 0 [5387/21279] Train loss: 0.24335,Valid loss: 0.45036, time : 11.97079062461853 lr : 0.9043820750088043
epoch : 0 [5388/21279] Train loss: 0.22016,Valid loss: 0.46321, time : 14.307236671447754 lr : 0.9043820750088043
epoch : 0 [5389/21279] Train loss: 0.23296,Valid loss: 0.46202, time : 11.871758937835693 lr : 0.9043820750088043
epoch : 0 [5390/21279] Train loss: 0.24720,Valid loss: 0.53313, time : 12.107011795043945 lr : 0.9043820750088043
epoch : 0 [5391/21279] Train loss: 0.24244,Valid loss: 0.47319, time : 12.362403392791748 

epoch : 0 [5455/21279] Train loss: 0.22498,Valid loss: 0.44875, time : 12.21569013595581 lr : 0.9043820750088043
epoch : 0 [5456/21279] Train loss: 0.24799,Valid loss: 0.31804, time : 11.904781103134155 lr : 0.9043820750088043
epoch : 0 [5457/21279] Train loss: 0.24130,Valid loss: 0.31637, time : 12.043524980545044 lr : 0.9043820750088043
epoch : 0 [5458/21279] Train loss: 0.22953,Valid loss: 0.30702, time : 12.21846055984497 lr : 0.9043820750088043
epoch : 0 [5459/21279] Train loss: 0.22487,Valid loss: 0.31091, time : 12.50338339805603 lr : 0.9043820750088043
epoch : 0 [5460/21279] Train loss: 0.22035,Valid loss: 0.30724, time : 12.296929597854614 lr : 0.9043820750088043
epoch : 0 [5461/21279] Train loss: 0.24632,Valid loss: 0.45185, time : 12.49800729751587 lr : 0.9043820750088043
epoch : 0 [5462/21279] Train loss: 0.21594,Valid loss: 0.30802, time : 12.329179286956787 lr : 0.9043820750088043
epoch : 0 [5463/21279] Train loss: 0.21640,Valid loss: 0.29221, time : 12.368950366973877 lr

epoch : 0 [5528/21279] Train loss: 0.21877,Valid loss: 0.33296, time : 12.450769662857056 lr : 0.8953382542587163
epoch : 0 [5529/21279] Train loss: 0.23103,Valid loss: 0.33403, time : 13.085484266281128 lr : 0.8953382542587163
epoch : 0 [5530/21279] Train loss: 0.20810,Valid loss: 0.30895, time : 12.460151433944702 lr : 0.8953382542587163
epoch : 0 [5531/21279] Train loss: 0.21889,Valid loss: 0.29418, time : 12.536157369613647 lr : 0.8953382542587163
epoch : 0 [5532/21279] Train loss: 0.21610,Valid loss: 0.32263, time : 12.716541290283203 lr : 0.8953382542587163
epoch : 0 [5533/21279] Train loss: 0.22300,Valid loss: 0.55137, time : 12.757593154907227 lr : 0.8953382542587163
epoch : 0 [5534/21279] Train loss: 0.24147,Valid loss: 0.44479, time : 12.7045259475708 lr : 0.8953382542587163
epoch : 0 [5535/21279] Train loss: 0.22148,Valid loss: 0.29127, time : 12.658130168914795 lr : 0.8953382542587163
epoch : 0 [5536/21279] Train loss: 0.21058,Valid loss: 0.31821, time : 16.87203359603882 l

epoch : 0 [5600/21279] Train loss: 0.24261,Valid loss: 0.42988, time : 12.160428524017334 lr : 0.8953382542587163
epoch : 0 [5601/21279] Train loss: 0.23613,Valid loss: 0.30278, time : 12.229682445526123 lr : 0.8953382542587163
epoch : 0 [5602/21279] Train loss: 0.21441,Valid loss: 0.28970, time : 11.63080620765686 lr : 0.8953382542587163
epoch : 0 [5603/21279] Train loss: 0.20002,Valid loss: 0.27773, time : 12.504825115203857 lr : 0.8953382542587163
epoch : 0 [5604/21279] Train loss: 0.21764,Valid loss: 0.27639, time : 11.7261483669281 lr : 0.8953382542587163
epoch : 0 [5605/21279] Train loss: 0.22615,Valid loss: 0.28622, time : 14.312845945358276 lr : 0.8953382542587163
epoch : 0 [5606/21279] Train loss: 0.22179,Valid loss: 0.29388, time : 12.036719799041748 lr : 0.8953382542587163
epoch : 0 [5607/21279] Train loss: 0.21163,Valid loss: 0.82036, time : 12.271899700164795 lr : 0.8953382542587163
epoch : 0 [5608/21279] Train loss: 0.20784,Valid loss: 0.48038, time : 12.347226858139038 l

epoch : 0 [5673/21279] Train loss: 0.22205,Valid loss: 0.28608, time : 12.170054912567139 lr : 0.8953382542587163
epoch : 0 [5674/21279] Train loss: 0.20647,Valid loss: 0.27604, time : 13.699732780456543 lr : 0.8953382542587163
epoch : 0 [5675/21279] Train loss: 0.22306,Valid loss: 0.31096, time : 12.726357221603394 lr : 0.8953382542587163
epoch : 0 [5676/21279] Train loss: 0.20738,Valid loss: 0.28744, time : 12.30353045463562 lr : 0.8953382542587163
epoch : 0 [5677/21279] Train loss: 0.21255,Valid loss: 0.28683, time : 12.429491758346558 lr : 0.8953382542587163
epoch : 0 [5678/21279] Train loss: 0.21070,Valid loss: 0.31239, time : 12.474599361419678 lr : 0.8953382542587163
epoch : 0 [5679/21279] Train loss: 0.21493,Valid loss: 0.33248, time : 12.247869491577148 lr : 0.8953382542587163
epoch : 0 [5680/21279] Train loss: 0.22456,Valid loss: 0.29835, time : 12.07025694847107 lr : 0.8953382542587163
epoch : 0 [5681/21279] Train loss: 0.22435,Valid loss: 0.29926, time : 12.48981523513794 l

epoch : 0 [5745/21279] Train loss: 0.20042,Valid loss: 0.26565, time : 12.812299489974976 lr : 0.8953382542587163
epoch : 0 [5746/21279] Train loss: 0.21109,Valid loss: 0.28404, time : 12.632340908050537 lr : 0.8953382542587163
epoch : 0 [5747/21279] Train loss: 0.21360,Valid loss: 0.30092, time : 12.77607536315918 lr : 0.8953382542587163
epoch : 0 [5748/21279] Train loss: 0.20780,Valid loss: 0.27961, time : 12.706265687942505 lr : 0.8953382542587163
epoch : 0 [5749/21279] Train loss: 0.19618,Valid loss: 0.50700, time : 12.10851788520813 lr : 0.8953382542587163
epoch : 0 [5750/21279] Train loss: 0.20924,Valid loss: 0.26229, time : 12.66460919380188 lr : 0.8953382542587163
epoch : 0 [5751/21279] Train loss: 0.20023,Valid loss: 0.28819, time : 12.356524467468262 lr : 0.8953382542587163
epoch : 0 [5752/21279] Train loss: 0.21915,Valid loss: 0.27375, time : 12.457216262817383 lr : 0.8953382542587163
epoch : 0 [5753/21279] Train loss: 0.21611,Valid loss: 0.26849, time : 12.350821018218994 l

epoch : 0 [5817/21279] Train loss: 0.20457,Valid loss: 0.27608, time : 12.268569707870483 lr : 0.8953382542587163
epoch : 0 [5818/21279] Train loss: 0.19058,Valid loss: 0.29013, time : 12.626589059829712 lr : 0.8953382542587163
epoch : 0 [5819/21279] Train loss: 0.20909,Valid loss: 0.30437, time : 12.7677743434906 lr : 0.8953382542587163
epoch : 0 [5820/21279] Train loss: 0.21906,Valid loss: 0.44539, time : 12.74200701713562 lr : 0.8953382542587163
epoch : 0 [5821/21279] Train loss: 0.22806,Valid loss: 0.29947, time : 12.152782440185547 lr : 0.8953382542587163
epoch : 0 [5822/21279] Train loss: 0.21881,Valid loss: 0.28120, time : 12.520474433898926 lr : 0.8953382542587163
epoch : 0 [5823/21279] Train loss: 0.21229,Valid loss: 0.26289, time : 11.902531623840332 lr : 0.8953382542587163
epoch : 0 [5824/21279] Train loss: 0.20370,Valid loss: 0.27121, time : 12.136374235153198 lr : 0.8953382542587163
epoch : 0 [5825/21279] Train loss: 0.19395,Valid loss: 0.25957, time : 13.79031252861023 lr

epoch : 0 [5889/21279] Train loss: 0.20391,Valid loss: 0.40911, time : 12.89437484741211 lr : 0.8953382542587163
epoch : 0 [5890/21279] Train loss: 0.20539,Valid loss: 0.38606, time : 12.614549160003662 lr : 0.8953382542587163
epoch : 0 [5891/21279] Train loss: 0.18838,Valid loss: 0.27212, time : 12.497590780258179 lr : 0.8953382542587163
epoch : 0 [5892/21279] Train loss: 0.20392,Valid loss: 0.39152, time : 12.578194856643677 lr : 0.8953382542587163
epoch : 0 [5893/21279] Train loss: 0.18276,Valid loss: 0.40823, time : 12.042886972427368 lr : 0.8953382542587163
epoch : 0 [5894/21279] Train loss: 0.20000,Valid loss: 0.25349, time : 14.587239980697632 lr : 0.8953382542587163
epoch : 0 [5895/21279] Train loss: 0.19594,Valid loss: 0.24752, time : 11.964415550231934 lr : 0.8953382542587163
epoch : 0 [5896/21279] Train loss: 0.18982,Valid loss: 0.26685, time : 12.658157348632812 lr : 0.8953382542587163
epoch : 0 [5897/21279] Train loss: 0.19930,Valid loss: 0.26629, time : 12.615726947784424

epoch : 0 [5962/21279] Train loss: 0.19101,Valid loss: 0.40601, time : 14.787677526473999 lr : 0.8953382542587163
epoch : 0 [5963/21279] Train loss: 0.19433,Valid loss: 0.24702, time : 13.20696759223938 lr : 0.8953382542587163
epoch : 0 [5964/21279] Train loss: 0.19933,Valid loss: 0.40147, time : 12.317876815795898 lr : 0.8953382542587163
epoch : 0 [5965/21279] Train loss: 0.19946,Valid loss: 0.39868, time : 12.730507612228394 lr : 0.8953382542587163
epoch : 0 [5966/21279] Train loss: 0.19549,Valid loss: 0.25638, time : 12.53849983215332 lr : 0.8953382542587163
epoch : 0 [5967/21279] Train loss: 0.19857,Valid loss: 0.26556, time : 12.189539670944214 lr : 0.8953382542587163
epoch : 0 [5968/21279] Train loss: 0.19151,Valid loss: 0.24858, time : 12.256743669509888 lr : 0.8953382542587163
epoch : 0 [5969/21279] Train loss: 0.18191,Valid loss: 0.24263, time : 12.298686265945435 lr : 0.8953382542587163
epoch : 0 [5970/21279] Train loss: 0.19150,Valid loss: 0.25890, time : 12.255898714065552 

epoch : 0 [6034/21279] Train loss: 0.18020,Valid loss: 0.25932, time : 12.759763956069946 lr : 0.8863848717161291
epoch : 0 [6035/21279] Train loss: 0.18265,Valid loss: 0.54856, time : 12.852579355239868 lr : 0.8863848717161291
epoch : 0 [6036/21279] Train loss: 0.21853,Valid loss: 0.41743, time : 12.51162075996399 lr : 0.8863848717161291
epoch : 0 [6037/21279] Train loss: 0.20316,Valid loss: 0.26839, time : 12.961869239807129 lr : 0.8863848717161291
epoch : 0 [6038/21279] Train loss: 0.20569,Valid loss: 0.25659, time : 12.830506086349487 lr : 0.8863848717161291
epoch : 0 [6039/21279] Train loss: 0.18651,Valid loss: 0.24955, time : 12.02243423461914 lr : 0.8863848717161291
epoch : 0 [6040/21279] Train loss: 0.19352,Valid loss: 0.24115, time : 12.268963098526001 lr : 0.8863848717161291
epoch : 0 [6041/21279] Train loss: 0.19244,Valid loss: 0.24254, time : 11.791554689407349 lr : 0.8863848717161291
epoch : 0 [6042/21279] Train loss: 0.18658,Valid loss: 0.25323, time : 12.103257656097412 

epoch : 0 [6107/21279] Train loss: 0.19950,Valid loss: 0.30430, time : 12.23923921585083 lr : 0.8863848717161291
epoch : 0 [6108/21279] Train loss: 0.20249,Valid loss: 0.29241, time : 12.592525005340576 lr : 0.8863848717161291
epoch : 0 [6109/21279] Train loss: 0.19237,Valid loss: 0.28250, time : 12.494659423828125 lr : 0.8863848717161291
epoch : 0 [6110/21279] Train loss: 0.19421,Valid loss: 0.30057, time : 13.240429639816284 lr : 0.8863848717161291
epoch : 0 [6111/21279] Train loss: 0.21260,Valid loss: 0.28675, time : 12.545653104782104 lr : 0.8863848717161291
epoch : 0 [6112/21279] Train loss: 0.19327,Valid loss: 0.29317, time : 12.99588942527771 lr : 0.8863848717161291
epoch : 0 [6113/21279] Train loss: 0.20656,Valid loss: 0.28381, time : 12.006412744522095 lr : 0.8863848717161291
epoch : 0 [6114/21279] Train loss: 0.19331,Valid loss: 0.27870, time : 14.705554485321045 lr : 0.8863848717161291
epoch : 0 [6115/21279] Train loss: 0.19237,Valid loss: 0.31022, time : 12.361741065979004 

epoch : 0 [6179/21279] Train loss: 0.17470,Valid loss: 0.26534, time : 12.637146234512329 lr : 0.8863848717161291
epoch : 0 [6180/21279] Train loss: 0.20235,Valid loss: 0.46128, time : 12.657594919204712 lr : 0.8863848717161291
epoch : 0 [6181/21279] Train loss: 0.19002,Valid loss: 0.24877, time : 12.825918912887573 lr : 0.8863848717161291
epoch : 0 [6182/21279] Train loss: 0.19870,Valid loss: 0.23773, time : 14.763952255249023 lr : 0.8863848717161291
epoch : 0 [6183/21279] Train loss: 0.19331,Valid loss: 0.23074, time : 12.682078838348389 lr : 0.8863848717161291
epoch : 0 [6184/21279] Train loss: 0.17929,Valid loss: 0.24034, time : 11.847204208374023 lr : 0.8863848717161291
epoch : 0 [6185/21279] Train loss: 0.18225,Valid loss: 0.24645, time : 12.433811902999878 lr : 0.8863848717161291
epoch : 0 [6186/21279] Train loss: 0.18125,Valid loss: 0.24995, time : 12.593262672424316 lr : 0.8863848717161291
epoch : 0 [6187/21279] Train loss: 0.18723,Valid loss: 0.23450, time : 12.80388832092285

epoch : 0 [6251/21279] Train loss: 0.19983,Valid loss: 0.23777, time : 12.071133375167847 lr : 0.8863848717161291
epoch : 0 [6252/21279] Train loss: 0.17838,Valid loss: 0.24696, time : 14.266223907470703 lr : 0.8863848717161291
epoch : 0 [6253/21279] Train loss: 0.17976,Valid loss: 0.23451, time : 12.356256008148193 lr : 0.8863848717161291
epoch : 0 [6254/21279] Train loss: 0.19147,Valid loss: 0.24594, time : 12.26571536064148 lr : 0.8863848717161291
epoch : 0 [6255/21279] Train loss: 0.18011,Valid loss: 0.24755, time : 11.820903301239014 lr : 0.8863848717161291
epoch : 0 [6256/21279] Train loss: 0.19618,Valid loss: 0.25778, time : 12.339102268218994 lr : 0.8863848717161291
epoch : 0 [6257/21279] Train loss: 0.16436,Valid loss: 0.23133, time : 12.536713123321533 lr : 0.8863848717161291
epoch : 0 [6258/21279] Train loss: 0.18741,Valid loss: 0.23271, time : 12.745091438293457 lr : 0.8863848717161291
epoch : 0 [6259/21279] Train loss: 0.16688,Valid loss: 0.23915, time : 12.449487447738647