In [1]:
# Copyright (c) Xi Chen
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# Borrowed from https://github.com/neocxi/pixelsnail-public and ported it to PyTorch

In [2]:
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_LAUNCH_BLOCKING"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [3]:
import gc

import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

from vq_vae_2 import Model
from transformer import VQVAETransformer
from lr_scheduler import WarmupLinearLRSchedule
from torchvision import utils as vutils
from utils import plot_images

from transformers.optimization import Adafactor, AdafactorSchedule
import wandb

In [4]:
wandb.init()
wandb.run.name = 'transformer2'
wandb.run.save()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mqja1998[0m. Use [1m`wandb login --relogin`[0m to force relogin




True

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
class MelData(Dataset):
    def __init__(self, file_path):
        self.data = []
        genre = ['classical', 'rock', 'electronic', 'pop']
        
        for g in genre:
            for i in range(1, 101):
                for j in range(5):
                    tmp_path = f'{file_path}/{g}/{i}-{j}.csv'
                    try:
                        self.data.append((pd.read_csv(tmp_path), g, i, j))
                    except FileNotFoundError:
                        print(f"{g}-{i}-{j} file is deleted")
                        continue
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        mel, g, i, j = self.data[idx]
        mel = torch.from_numpy(pd.get_dummies(mel).values)
        mel = mel.type(torch.cuda.FloatTensor)
        
        return (g, i, j), mel

class EmotionalData(Dataset):
    def __init__(self, file_path):
        self.data = pd.read_csv(file_path)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        tmp_data = self.data.iloc[idx]
        genre, idx = tmp_data[0].split('_')
        emo = tmp_data[1:]
        return idx, genre, torch.FloatTensor(emo)

In [7]:
batch_size = 32

In [8]:
EMO_PATH = "./mean_data.csv"
MEL_ARR_PATH = "./split_mel_array"
SAVE_PATH = "./save_models"
    
mel_arr_data = MelData(MEL_ARR_PATH)
#emo_data = EmotionalData(EMO_PATH)


classical-16-0 file is deleted
classical-16-1 file is deleted
classical-16-2 file is deleted
classical-16-3 file is deleted
classical-16-4 file is deleted
classical-40-0 file is deleted
classical-40-1 file is deleted
classical-40-2 file is deleted
classical-40-3 file is deleted
classical-40-4 file is deleted
classical-57-0 file is deleted
classical-57-1 file is deleted
classical-57-2 file is deleted
classical-57-3 file is deleted
classical-57-4 file is deleted
classical-66-0 file is deleted
classical-66-1 file is deleted
classical-66-2 file is deleted
classical-66-3 file is deleted
classical-66-4 file is deleted
classical-73-0 file is deleted
classical-73-1 file is deleted
classical-73-2 file is deleted
classical-73-3 file is deleted
classical-73-4 file is deleted


In [9]:
mel_arr_data_loader = DataLoader(
        dataset=mel_arr_data, batch_size=batch_size)

#emo_data_loader = DataLoader(
#        dataset=emo_data, batch_size=batch_size, shuffle=True)

In [10]:
def scaled(x):
    return x + 80.0
def unscaled(x):
    return x - 80.0

In [11]:
# extract idices
# torch.Size([32, 10, 128]) torch.Size([32, 20, 256])
def extract_indice(mel_data, model):
    with torch.no_grad():
        for _, mel in mel_data:
            x = scaled(mel)
            x = x[:, :, :-4].reshape(-1, 1, 320, 256).to(device)
            _, _, _, _, id_t, id_b = model.encode(x)
            try:
                ids_t = torch.cat([ids_t, id_t], dim=0)
                ids_b = torch.cat([ids_b, id_b], dim=0)
            except Exception as e:
                print(e)
                ids_t, ids_b = id_t.clone().detach(), id_b.clone().detach()
    return ids_t, ids_b
            

In [12]:
num_hiddens = 128 #128
num_residual_hiddens = 32
num_residual_layers = 4
embedding_dim = 64 #64
num_embeddings = 512 #512
commitment_cost = 0.25

In [13]:
vqvae = Model(num_hiddens=num_hiddens, 
                  num_residual_layers=num_residual_layers,
                  num_residual_hiddens=num_residual_hiddens,
                  num_embeddings=num_embeddings,
                  embedding_dim=embedding_dim, 
                  commitment_cost=commitment_cost).to(device)

score = 119.51515197753906
MODEL_PATH = f'{SAVE_PATH}/vqvae2_light-{score:.5f}_dict.pt'
vqvae.load_state_dict(torch.load(MODEL_PATH))

<All keys matched successfully>

In [14]:
ids_t, ids_b = extract_indice(mel_arr_data_loader, vqvae)
print(ids_t.size(), ids_b.size())

local variable 'ids_t' referenced before assignment
torch.Size([1975, 20, 16]) torch.Size([1975, 40, 32])


In [15]:
import importlib, transformer
importlib.reload(transformer)
from transformer import VQVAETransformer

In [16]:
class TrainTransformer:
    def __init__(self, args, data, lev):
        self.model = VQVAETransformer(args).to(device=args.device)
        self.optim = self.configure_optimizers()
        self.lr_schedule = WarmupLinearLRSchedule(
            optimizer=self.optim,
            init_lr=1e-7,
            peak_lr=args.learning_rate,
            end_lr=0.,
            warmup_epochs=10,
            epochs=args.epochs,
            current_step=args.start_from_epoch
        )
        #self.lr_schedule = AdafactorSchedule(self.optim, initial_lr=1e-2)

        if args.start_from_epoch > 1:
            self.model.load_checkpoint(args.start_from_epoch)
            print(f"Loaded Transformer from epoch {args.start_from_epoch}.")
        
        wandb.watch(self.model)
        self.lev = lev
        self.train(args, data)

    def train(self, args, data):
        train_dataset = data
        len_train_dataset = len(train_dataset)
        step = args.start_from_epoch * len_train_dataset
        for epoch in range(args.start_from_epoch+1, args.epochs+1):
            print(f"Epoch {epoch}:")
            with tqdm(range(len(train_dataset))) as pbar:
                for i, (ids_t, ids_b) in zip(pbar, train_dataset):
                    if self.lev == 'top':
                        imgs = ids_t
                    elif self.lev == 'bot':
                        imgs = ids_b
                    imgs = imgs.to(device=args.device)
                    logits, target = self.model(imgs)
                    loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), target.reshape(-1)).requires_grad_(True)
                    loss.backward()
                    if step % args.accum_grad == 0:
                        self.optim.step()
                        self.optim.zero_grad()
                    step += 1
                    pbar.set_postfix(Transformer_Loss=np.round(loss.cpu().detach().numpy().item(), 4))
                    pbar.update(0)
                    
                    pbar.set_description(
                    (
                        f" lr {self.optim.param_groups[0]['lr']:.6f}\t"
                    )
                    )
                    wandb.log({
                        "Loss": loss,
                        "Learning rate": self.optim.param_groups[0]['lr']
                    })
                self.lr_schedule.step()
            try:
                log, sampled_imgs = self.model.log_images(imgs[0:1])
                vutils.save_image(sampled_imgs.add(1).mul(0.5), os.path.join("results", f"{epoch}.jpg"), nrow=4)
                plot_images(log)
            except:
                pass
            #if epoch % args.ckpt_interval == 0:
            #    torch.save(self.model.state_dict(), os.path.join("checkpoints", f"transformer_epoch_{epoch}.pt"))
            torch.save(self.model.state_dict(), os.path.join("checkpoints", "transformer_current_bot.pt"))

    def configure_optimizers(self):
        # decay, no_decay = set(), set()
        # whitelist_weight_modules = (nn.Linear,)
        # blacklist_weight_modules = (nn.LayerNorm, nn.Embedding)
        # for mn, m in self.model.transformer.named_modules():
        #     for pn, p in m.named_parameters():
        #         fpn = '%s.%s' % (mn, pn) if mn else pn  # full param name
        #
        #         if pn.endswith('bias'):
        #             no_decay.add(fpn)
        #
        #         elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules):
        #             decay.add(fpn)
        #
        #         elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
        #             no_decay.add(fpn)
        #
        # # no_decay.add('pos_emb')
        #
        # param_dict = {pn: p for pn, p in self.model.transformer.named_parameters()}
        #
        # optim_groups = [
        #     {"params": [param_dict[pn] for pn in sorted(list(decay))], "weight_decay": 4.5e-2},
        #     {"params": [param_dict[pn] for pn in sorted(list(no_decay))], "weight_decay": 0.0},
        # ]
        optimizer = torch.optim.Adam(self.model.transformer.parameters(), lr=1e-7, betas=(0.9, 0.96), weight_decay=4.5e-2)
        #optimizer = Adafactor(self.model.parameters(), lr=0.0, scale_parameter=True, relative_step=False)
        #optimizer = Adafactor(self.model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True)
        return optimizer


In [None]:
torch.save(TrainTransformer.model.state_dict(), os.path.join("checkpoints", "transformer_current_bot.pt"))

In [17]:
import argparse
parser = argparse.ArgumentParser(description="VQGAN")
parser.add_argument('--run-name', type=str, default=None)
parser.add_argument('--latent-dim', type=int, default=32, help='Latent dimension n_z.')
parser.add_argument('--image-size', type=int, default=256, help='Image height and width.)')
parser.add_argument('--num-codebook-vectors', type=int, default=8192, help='Number of codebook vectors.')
parser.add_argument('--beta', type=float, default=0.25, help='Commitment loss scalar.')
parser.add_argument('--image-channels', type=int, default=3, help='Number of channels of images.')
parser.add_argument('--dataset-path', type=str, default='./data', help='Path to data.')
parser.add_argument('--checkpoint-path', type=str, default='./checkpoints/last_ckpt.pt', help='Path to checkpoint.')
parser.add_argument('--device', type=str, default="cuda", help='Which device the training is on.')
parser.add_argument('--batch-size', type=int, default=10, help='Batch size for training.')
parser.add_argument('--accum-grad', type=int, default=10, help='Number for gradient accumulation.')
parser.add_argument('--epochs', type=int, default=300, help='Number of epochs to train.')
parser.add_argument('--start-from-epoch', type=int, default=1, help='Number of epochs to train.')
parser.add_argument('--ckpt-interval', type=int, default=100, help='Number of epochs to train.')
parser.add_argument('--learning-rate', type=float, default=1e-4, help='Learning rate.')

parser.add_argument('--sos-token', type=int, default=1025, help='Start of Sentence token.')

parser.add_argument('--n-layers', type=int, default=24, help='Number of layers of transformer.')
parser.add_argument('--dim', type=int, default=768, help='Dimension of transformer.')
parser.add_argument('--hidden-dim', type=int, default=3072, help='Dimension of transformer.')
parser.add_argument('--num-image-tokens', type=int, default=256, help='Number of image tokens.')

args = parser.parse_args(args=[])
args.run_name = "<name>"
args.dataset_path = r"C:\Users\dome\datasets\landscape"
args.checkpoint_path = r".\checkpoints"
args.n_layers = 12
args.dim = 512
args.hidden_dim = 3072
args.batch_size = 4
args.accum_grad = 25
args.epochs = 1000

args.start_from_epoch = 0

args.num_codebook_vectors = 512
args.num_image_tokens = 40 * 32

wandb.config.update(args)

In [18]:
class IDsData(Dataset):
    def __init__(self, ids_t, ids_b):
        self.ids_t = ids_t
        self.ids_b = ids_b
        
    def __len__(self):
        return len(self.ids_t)
    
    def __getitem__(self, idx):
        return self.ids_t[idx], self.ids_b[idx]

In [19]:
length = len(ids_t)
ids_data = IDsData(ids_t.view(length, -1), ids_b.view(length, -1))

In [20]:
batch = 4
ids_loader = DataLoader(
        ids_data, batch_size=batch, shuffle=True, drop_last=False
    )

In [21]:
train_transformer = TrainTransformer(args, ids_loader, lev='bot')

Initializing Module Embedding.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Module Linear.
Initializing Module NonDynamicallyQuantizableLinear.
Initializing Module Linear.
Initializing Mod

 lr 0.000000	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=6.35]


Epoch 2:


 lr 0.000000	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=6.33]


Epoch 3:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=5.57]


Epoch 4:


 lr 0.000020	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.1] 


Epoch 5:


 lr 0.000030	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.64]


Epoch 6:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.89]


Epoch 7:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.699]


Epoch 8:


 lr 0.000060	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.03] 


Epoch 9:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.774]


Epoch 10:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.01] 


Epoch 11:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.152]


Epoch 12:


 lr 0.000100	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.57]  


Epoch 13:


 lr 0.000100	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.89]  


Epoch 14:


 lr 0.000100	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.585] 


Epoch 15:


 lr 0.000100	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.04]  


Epoch 16:


 lr 0.000100	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.394] 


Epoch 17:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.11]  


Epoch 18:


 lr 0.000099	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.294] 


Epoch 19:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.786] 


Epoch 20:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.36]  


Epoch 21:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.408] 


Epoch 22:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.19]  


Epoch 23:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.09]  


Epoch 24:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.05]  


Epoch 25:


 lr 0.000099	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.25]  


Epoch 26:


 lr 0.000099	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.309] 


Epoch 27:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.92]  


Epoch 28:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.75]  


Epoch 29:


 lr 0.000098	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.37]  


Epoch 30:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.13]  


Epoch 31:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.18]  


Epoch 32:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.651] 


Epoch 33:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.566] 


Epoch 34:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0533]


Epoch 35:


 lr 0.000098	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.803] 


Epoch 36:


 lr 0.000098	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.07]  


Epoch 37:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.71]  


Epoch 38:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0621]


Epoch 39:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.26]  


Epoch 40:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.15]  


Epoch 41:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.192] 


Epoch 42:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.32]  


Epoch 43:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.901] 


Epoch 44:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.21]  


Epoch 45:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.95]  


Epoch 46:


 lr 0.000097	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.811] 


Epoch 47:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0281]


Epoch 48:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.758] 


Epoch 49:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.42]  


Epoch 50:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.46]  


Epoch 51:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.62]  


Epoch 52:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.491] 


Epoch 53:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.39]  


Epoch 54:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.77]  


Epoch 55:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.98]  


Epoch 56:


 lr 0.000096	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.317] 


Epoch 57:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.62]  


Epoch 58:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.42]  


Epoch 59:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.21]  


Epoch 60:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.99]  


Epoch 61:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.824] 


Epoch 62:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.15]  


Epoch 63:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0383]


Epoch 64:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.159] 


Epoch 65:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.21]  


Epoch 66:


 lr 0.000095	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.848] 


Epoch 67:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.39]  


Epoch 68:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0841]


Epoch 69:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.186] 


Epoch 70:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0431]


Epoch 71:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.97]  


Epoch 72:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.705] 


Epoch 73:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.32]  


Epoch 74:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.683] 


Epoch 75:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.79]  


Epoch 76:


 lr 0.000094	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.67]  


Epoch 77:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.45]  


Epoch 78:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0531]


Epoch 79:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.5]   


Epoch 80:


 lr 0.000093	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.0845]


Epoch 81:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0643]


Epoch 82:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=4.19]  


Epoch 83:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.756] 


Epoch 84:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0833]


Epoch 85:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.03]  


Epoch 86:


 lr 0.000093	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.653] 


Epoch 87:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.88]  


Epoch 88:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.637] 


Epoch 89:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.11]  


Epoch 90:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.08]  


Epoch 91:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.075] 


Epoch 92:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.03]  


Epoch 93:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.14]  


Epoch 94:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.63]  


Epoch 95:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.62]  


Epoch 96:


 lr 0.000092	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.37]  


Epoch 97:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.6]   


Epoch 98:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.39]  


Epoch 99:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.34]  


Epoch 100:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.95]  


Epoch 101:


 lr 0.000091	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=1.04]  


Epoch 102:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.237] 


Epoch 103:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.234] 


Epoch 104:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.73]  


Epoch 105:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.339] 


Epoch 106:


 lr 0.000091	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.269] 


Epoch 107:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0477]


Epoch 108:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.434] 


Epoch 109:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.438] 


Epoch 110:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0604]


Epoch 111:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.583] 


Epoch 112:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.5]   


Epoch 113:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.47]  


Epoch 114:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.34]  


Epoch 115:


 lr 0.000090	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.09]  


Epoch 116:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.25]  


Epoch 117:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.02]  


Epoch 118:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.67]  


Epoch 119:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.5]   


Epoch 120:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.65]  


Epoch 121:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.6]   


Epoch 122:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.71]  


Epoch 123:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.964] 


Epoch 124:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.175] 


Epoch 125:


 lr 0.000089	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.68]  


Epoch 126:


 lr 0.000088	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.16]  


Epoch 127:


 lr 0.000088	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.03]  


Epoch 128:


 lr 0.000088	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.41]  


Epoch 129:


 lr 0.000088	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.406] 


Epoch 130:


 lr 0.000088	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.49]  


Epoch 131:


 lr 0.000088	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.692] 


Epoch 132:


 lr 0.000088	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.63]  


Epoch 133:


 lr 0.000088	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.284] 


Epoch 134:


 lr 0.000088	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.39]  


Epoch 135:


 lr 0.000088	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.42]  


Epoch 136:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.05]  


Epoch 137:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.847] 


Epoch 138:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.18]  


Epoch 139:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.48]  


Epoch 140:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.56]  


Epoch 141:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.736] 


Epoch 142:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.242] 


Epoch 143:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2]     


Epoch 144:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.293] 


Epoch 145:


 lr 0.000087	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.09]  


Epoch 146:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.238] 


Epoch 147:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.48]  


Epoch 148:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.877] 


Epoch 149:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.348] 


Epoch 150:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.598] 


Epoch 151:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.12]  


Epoch 152:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.592] 


Epoch 153:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0697]


Epoch 154:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.967] 


Epoch 155:


 lr 0.000086	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.91]  


Epoch 156:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.667] 


Epoch 157:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.84]  


Epoch 158:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.38]  


Epoch 159:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.01]  


Epoch 160:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.93]  


Epoch 161:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.01]  


Epoch 162:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.52]  


Epoch 163:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.43]  


Epoch 164:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0695]


Epoch 165:


 lr 0.000085	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.408] 


Epoch 166:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.1]   


Epoch 167:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.89]  


Epoch 168:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.15]  


Epoch 169:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.25]  


Epoch 170:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.72]  


Epoch 171:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0642]


Epoch 172:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.58]  


Epoch 173:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.62]  


Epoch 174:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.07]  


Epoch 175:


 lr 0.000084	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.39]  


Epoch 176:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.26]  


Epoch 177:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.718] 


Epoch 178:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.24]  


Epoch 179:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.68]  


Epoch 180:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.156] 


Epoch 181:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.62]  


Epoch 182:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.393] 


Epoch 183:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.9]   


Epoch 184:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.903] 


Epoch 185:


 lr 0.000083	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.258] 


Epoch 186:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.083] 


Epoch 187:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.071] 


Epoch 188:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.74]  


Epoch 189:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.146] 


Epoch 190:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.532] 


Epoch 191:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.165] 


Epoch 192:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.08]  


Epoch 193:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.09]  


Epoch 194:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.131] 


Epoch 195:


 lr 0.000082	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.151] 


Epoch 196:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.247] 


Epoch 197:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.92]  


Epoch 198:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.45]  


Epoch 199:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.97]  


Epoch 200:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.137] 


Epoch 201:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.512] 


Epoch 202:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.647] 


Epoch 203:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.44]  


Epoch 204:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.53]  


Epoch 205:


 lr 0.000081	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.4]   


Epoch 206:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.32]  


Epoch 207:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.35]  


Epoch 208:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.111] 


Epoch 209:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.196] 


Epoch 210:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.76]  


Epoch 211:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.124] 


Epoch 212:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=4.66]  


Epoch 213:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.27]  


Epoch 214:


 lr 0.000080	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=4.74]  


Epoch 215:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.36]  


Epoch 216:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.927] 


Epoch 217:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.127] 


Epoch 218:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.503] 


Epoch 219:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.96]  


Epoch 220:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.1]   


Epoch 221:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.19]  


Epoch 222:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0745]


Epoch 223:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.97]  


Epoch 224:


 lr 0.000079	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.901] 


Epoch 225:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.395] 


Epoch 226:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.254] 


Epoch 227:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.33]  


Epoch 228:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.53]  


Epoch 229:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0749]


Epoch 230:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.97]  


Epoch 231:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.78]  


Epoch 232:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0579]


Epoch 233:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.402] 


Epoch 234:


 lr 0.000078	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.65]  


Epoch 235:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.471] 


Epoch 236:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.49]  


Epoch 237:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.107] 


Epoch 238:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.686] 


Epoch 239:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.19]  


Epoch 240:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.3]   


Epoch 241:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0632]


Epoch 242:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.83]  


Epoch 243:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.395] 


Epoch 244:


 lr 0.000077	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.64]  


Epoch 245:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.53]  


Epoch 246:


 lr 0.000076	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.991] 


Epoch 247:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.44]  


Epoch 248:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.98]  


Epoch 249:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.11]  


Epoch 250:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.82]  


Epoch 251:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.131] 


Epoch 252:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.849] 


Epoch 253:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.692] 


Epoch 254:


 lr 0.000076	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.61]  


Epoch 255:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.32]  


Epoch 256:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0682]


Epoch 257:


 lr 0.000075	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.318] 


Epoch 258:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.04]  


Epoch 259:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.739] 


Epoch 260:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.247] 


Epoch 261:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.94]  


Epoch 262:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.74]  


Epoch 263:


 lr 0.000075	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=2.27]  


Epoch 264:


 lr 0.000075	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0822]


Epoch 265:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.21]  


Epoch 266:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.12]  


Epoch 267:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.415] 


Epoch 268:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.557] 


Epoch 269:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.16]  


Epoch 270:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.24]  


Epoch 271:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.466] 


Epoch 272:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.116] 


Epoch 273:


 lr 0.000074	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.447] 


Epoch 274:


 lr 0.000074	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.422] 


Epoch 275:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.284] 


Epoch 276:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.16]  


Epoch 277:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.42]  


Epoch 278:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.79]  


Epoch 279:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.45]  


Epoch 280:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0553]


Epoch 281:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.462] 


Epoch 282:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.135] 


Epoch 283:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.403] 


Epoch 284:


 lr 0.000073	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.151] 


Epoch 285:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.01]  


Epoch 286:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0677]


Epoch 287:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.4]   


Epoch 288:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.09]  


Epoch 289:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0673]


Epoch 290:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.95]  


Epoch 291:


 lr 0.000072	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=1.01]  


Epoch 292:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.23]  


Epoch 293:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.02]  


Epoch 294:


 lr 0.000072	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.89]  


Epoch 295:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.727] 


Epoch 296:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.1]   


Epoch 297:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.215] 


Epoch 298:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.28]  


Epoch 299:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.86]  


Epoch 300:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0777]


Epoch 301:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.65]  


Epoch 302:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.45]  


Epoch 303:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.91]  


Epoch 304:


 lr 0.000071	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.648] 


Epoch 305:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0506]


Epoch 306:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0453]


Epoch 307:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.353] 


Epoch 308:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.77]  


Epoch 309:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.523] 


Epoch 310:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0718]


Epoch 311:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.352] 


Epoch 312:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.44]  


Epoch 313:


 lr 0.000070	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.449] 


Epoch 314:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.135] 


Epoch 315:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.12]  


Epoch 316:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.485] 


Epoch 317:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0814]


Epoch 318:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.111] 


Epoch 319:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.735] 


Epoch 320:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.142] 


Epoch 321:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0785]


Epoch 322:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.68]  


Epoch 323:


 lr 0.000069	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.782] 


Epoch 324:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.582] 


Epoch 325:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.164] 


Epoch 326:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.644] 


Epoch 327:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.187] 


Epoch 328:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.01]  


Epoch 329:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.89]  


Epoch 330:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.116] 


Epoch 331:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.221] 


Epoch 332:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0943]


Epoch 333:


 lr 0.000068	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.31]  


Epoch 334:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.284] 


Epoch 335:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.7]   


Epoch 336:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=4.02]  


Epoch 337:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.48]  


Epoch 338:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.755] 


Epoch 339:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.23]  


Epoch 340:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.13]  


Epoch 341:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.731] 


Epoch 342:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.3]   


Epoch 343:


 lr 0.000067	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.205] 


Epoch 344:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=4.57]  


Epoch 345:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.25]  


Epoch 346:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0642]


Epoch 347:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.139] 


Epoch 348:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.51]  


Epoch 349:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.09]  


Epoch 350:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.07]  


Epoch 351:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.12]  


Epoch 352:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1]     


Epoch 353:


 lr 0.000066	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.1]   


Epoch 354:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.591] 


Epoch 355:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.179] 


Epoch 356:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.912] 


Epoch 357:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.41]  


Epoch 358:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.815] 


Epoch 359:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.944] 


Epoch 360:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.23]  


Epoch 361:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.961] 


Epoch 362:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.1]   


Epoch 363:


 lr 0.000065	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.639] 


Epoch 364:


 lr 0.000064	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.697] 


Epoch 365:


 lr 0.000064	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.108] 


Epoch 366:


 lr 0.000064	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.169] 


Epoch 367:


 lr 0.000064	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.44]  


Epoch 368:


 lr 0.000064	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.76]  


Epoch 369:


 lr 0.000064	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.91]  


Epoch 370:


 lr 0.000064	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.791] 


Epoch 371:


 lr 0.000064	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.67]  


Epoch 372:


 lr 0.000064	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.404] 


Epoch 373:


 lr 0.000064	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.93]  


Epoch 374:


 lr 0.000063	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.69]  


Epoch 375:


 lr 0.000063	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.395] 


Epoch 376:


 lr 0.000063	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.29]  


Epoch 377:


 lr 0.000063	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.19]  


Epoch 378:


 lr 0.000063	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.38]  


Epoch 379:


 lr 0.000063	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.113] 


Epoch 380:


 lr 0.000063	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.48]  


Epoch 381:


 lr 0.000063	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.2]   


Epoch 382:


 lr 0.000063	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0794]


Epoch 383:


 lr 0.000063	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.64]  


Epoch 384:


 lr 0.000062	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.32]  


Epoch 385:


 lr 0.000062	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.63]  


Epoch 386:


 lr 0.000062	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.62]  


Epoch 387:


 lr 0.000062	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2]     


Epoch 388:


 lr 0.000062	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.535] 


Epoch 389:


 lr 0.000062	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.726] 


Epoch 390:


 lr 0.000062	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.5]   


Epoch 391:


 lr 0.000062	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.95]  


Epoch 392:


 lr 0.000062	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.41]  


Epoch 393:


 lr 0.000062	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.357] 


Epoch 394:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.824] 


Epoch 395:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.58]  


Epoch 396:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.232] 


Epoch 397:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.27]  


Epoch 398:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.66]  


Epoch 399:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.369] 


Epoch 400:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.3]   


Epoch 401:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.35]  


Epoch 402:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0859]


Epoch 403:


 lr 0.000061	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.5]   


Epoch 404:


 lr 0.000060	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.154] 


Epoch 405:


 lr 0.000060	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.31]  


Epoch 406:


 lr 0.000060	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0652]


Epoch 407:


 lr 0.000060	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.211] 


Epoch 408:


 lr 0.000060	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.64]  


Epoch 409:


 lr 0.000060	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.973] 


Epoch 410:


 lr 0.000060	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.12]  


Epoch 411:


 lr 0.000060	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.115] 


Epoch 412:


 lr 0.000060	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.672] 


Epoch 413:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.15]  


Epoch 414:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.95]  


Epoch 415:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0588]


Epoch 416:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.37]  


Epoch 417:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.084] 


Epoch 418:


 lr 0.000059	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.119] 


Epoch 419:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.9]   


Epoch 420:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.709] 


Epoch 421:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.88]  


Epoch 422:


 lr 0.000059	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.046] 


Epoch 423:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0419]


Epoch 424:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.813] 


Epoch 425:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.73]  


Epoch 426:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.71]  


Epoch 427:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.01]  


Epoch 428:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.41]  


Epoch 429:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.07]  


Epoch 430:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.95]  


Epoch 431:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.124] 


Epoch 432:


 lr 0.000058	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.57]  


Epoch 433:


 lr 0.000057	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.82]  


Epoch 434:


 lr 0.000057	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.59]  


Epoch 435:


 lr 0.000057	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.39]  


Epoch 436:


 lr 0.000057	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.16]  


Epoch 437:


 lr 0.000057	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.59]  


Epoch 438:


 lr 0.000057	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.819] 


Epoch 439:


 lr 0.000057	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0742]


Epoch 440:


 lr 0.000057	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.854] 


Epoch 441:


 lr 0.000057	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.86]  


Epoch 442:


 lr 0.000057	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.82]  


Epoch 443:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.393] 


Epoch 444:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.08]  


Epoch 445:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.25]  


Epoch 446:


 lr 0.000056	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.38]  


Epoch 447:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.92]  


Epoch 448:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.27]  


Epoch 449:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.44]  


Epoch 450:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0415]


Epoch 451:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.922] 


Epoch 452:


 lr 0.000056	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.44]  


Epoch 453:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.181] 


Epoch 454:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.15]  


Epoch 455:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.09]  


Epoch 456:


 lr 0.000055	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.14]  


Epoch 457:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.656] 


Epoch 458:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0907]


Epoch 459:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.989] 


Epoch 460:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.16]  


Epoch 461:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.79]  


Epoch 462:


 lr 0.000055	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.106] 


Epoch 463:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.3]   


Epoch 464:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0623]


Epoch 465:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.91]  


Epoch 466:


 lr 0.000054	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.03]  


Epoch 467:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.389] 


Epoch 468:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.59]  


Epoch 469:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.55]  


Epoch 470:


 lr 0.000054	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=4.71]  


Epoch 471:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.703] 


Epoch 472:


 lr 0.000054	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.15]  


Epoch 473:


 lr 0.000053	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.02]  


Epoch 474:


 lr 0.000053	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.17]  


Epoch 475:


 lr 0.000053	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.032] 


Epoch 476:


 lr 0.000053	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.126] 


Epoch 477:


 lr 0.000053	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0814]


Epoch 478:


 lr 0.000053	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0441]


Epoch 479:


 lr 0.000053	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.206] 


Epoch 480:


 lr 0.000053	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.272] 


Epoch 481:


 lr 0.000053	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.101] 


Epoch 482:


 lr 0.000053	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.104] 


Epoch 483:


 lr 0.000052	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.5]   


Epoch 484:


 lr 0.000052	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.837] 


Epoch 485:


 lr 0.000052	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.365] 


Epoch 486:


 lr 0.000052	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0377]


Epoch 487:


 lr 0.000052	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.77]  


Epoch 488:


 lr 0.000052	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.814] 


Epoch 489:


 lr 0.000052	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.72]  


Epoch 490:


 lr 0.000052	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.76]  


Epoch 491:


 lr 0.000052	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0575]


Epoch 492:


 lr 0.000052	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.694] 


Epoch 493:


 lr 0.000051	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0376]


Epoch 494:


 lr 0.000051	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.72]  


Epoch 495:


 lr 0.000051	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.1]   


Epoch 496:


 lr 0.000051	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.22]  


Epoch 497:


 lr 0.000051	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0379]


Epoch 498:


 lr 0.000051	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.822] 


Epoch 499:


 lr 0.000051	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.18]  


Epoch 500:


 lr 0.000051	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.51]  


Epoch 501:


 lr 0.000051	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.21]  


Epoch 502:


 lr 0.000051	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.29]  


Epoch 503:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.256] 


Epoch 504:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.14]  


Epoch 505:


 lr 0.000050	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.07]  


Epoch 506:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.21]  


Epoch 507:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.22]  


Epoch 508:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.85]  


Epoch 509:


 lr 0.000050	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0507]


Epoch 510:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.445] 


Epoch 511:


 lr 0.000050	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.65]  


Epoch 512:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0788]


Epoch 513:


 lr 0.000049	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.76]  


Epoch 514:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.79]  


Epoch 515:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.18]  


Epoch 516:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.73]  


Epoch 517:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4]     


Epoch 518:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0324]


Epoch 519:


 lr 0.000049	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.31]  


Epoch 520:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.78]  


Epoch 521:


 lr 0.000049	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.674] 


Epoch 522:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.21]  


Epoch 523:


 lr 0.000048	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.85]  


Epoch 524:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.964] 


Epoch 525:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.31]  


Epoch 526:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.16]  


Epoch 527:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.42]  


Epoch 528:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.03]  


Epoch 529:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.103] 


Epoch 530:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.13]  


Epoch 531:


 lr 0.000048	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.661] 


Epoch 532:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.89]  


Epoch 533:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.553] 


Epoch 534:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.69]  


Epoch 535:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.64]  


Epoch 536:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0635]


Epoch 537:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.594] 


Epoch 538:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.548] 


Epoch 539:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0429]


Epoch 540:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.753] 


Epoch 541:


 lr 0.000047	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.53]  


Epoch 542:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.575] 


Epoch 543:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.39]  


Epoch 544:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.948] 


Epoch 545:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.65]  


Epoch 546:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.279] 


Epoch 547:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.61]  


Epoch 548:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.04]  


Epoch 549:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.71]  


Epoch 550:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.47]  


Epoch 551:


 lr 0.000046	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.27]  


Epoch 552:


 lr 0.000045	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.922] 


Epoch 553:


 lr 0.000045	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.31]  


Epoch 554:


 lr 0.000045	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.72]  


Epoch 555:


 lr 0.000045	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.346] 


Epoch 556:


 lr 0.000045	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.559] 


Epoch 557:


 lr 0.000045	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.95]  


Epoch 558:


 lr 0.000045	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.69]  


Epoch 559:


 lr 0.000045	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.91]  


Epoch 560:


 lr 0.000045	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.77]  


Epoch 561:


 lr 0.000045	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.39]  


Epoch 562:


 lr 0.000044	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.11]  


Epoch 563:


 lr 0.000044	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.179] 


Epoch 564:


 lr 0.000044	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0634]


Epoch 565:


 lr 0.000044	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.76]  


Epoch 566:


 lr 0.000044	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.154] 


Epoch 567:


 lr 0.000044	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.25]  


Epoch 568:


 lr 0.000044	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.1]   


Epoch 569:


 lr 0.000044	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.253] 


Epoch 570:


 lr 0.000044	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.29]  


Epoch 571:


 lr 0.000044	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.2]   


Epoch 572:


 lr 0.000043	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.145] 


Epoch 573:


 lr 0.000043	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.29]  


Epoch 574:


 lr 0.000043	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.06]  


Epoch 575:


 lr 0.000043	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.856] 


Epoch 576:


 lr 0.000043	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.558] 


Epoch 577:


 lr 0.000043	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=2.3]   


Epoch 578:


 lr 0.000043	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=1.73]  


Epoch 579:


 lr 0.000043	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.9]   


Epoch 580:


 lr 0.000043	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0511]


Epoch 581:


 lr 0.000043	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.47]  


Epoch 582:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.193] 


Epoch 583:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0363]


Epoch 584:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=4.02]  


Epoch 585:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.48]  


Epoch 586:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.205] 


Epoch 587:


 lr 0.000042	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.961] 


Epoch 588:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0371]


Epoch 589:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0946]


Epoch 590:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.01]  


Epoch 591:


 lr 0.000042	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.77]  


Epoch 592:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.946] 


Epoch 593:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.43]  


Epoch 594:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.696] 


Epoch 595:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.351] 


Epoch 596:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.1]   


Epoch 597:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.76]  


Epoch 598:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.414] 


Epoch 599:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0411]


Epoch 600:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.652] 


Epoch 601:


 lr 0.000041	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.213] 


Epoch 602:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.372] 


Epoch 603:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.58]  


Epoch 604:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.159] 


Epoch 605:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.49]  


Epoch 606:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.523] 


Epoch 607:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.72]  


Epoch 608:


 lr 0.000040	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.125] 


Epoch 609:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.05]  


Epoch 610:


 lr 0.000040	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0465]


Epoch 611:


 lr 0.000039	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.687] 


Epoch 612:


 lr 0.000039	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.032] 


Epoch 613:


 lr 0.000039	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.733] 


Epoch 614:


 lr 0.000039	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0747]


Epoch 615:


 lr 0.000039	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0481]


Epoch 616:


 lr 0.000039	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.302] 


Epoch 617:


 lr 0.000039	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.735] 


Epoch 618:


 lr 0.000039	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.856] 


Epoch 619:


 lr 0.000039	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.144] 


Epoch 620:


 lr 0.000039	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.511] 


Epoch 621:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.424] 


Epoch 622:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.354] 


Epoch 623:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.659] 


Epoch 624:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.281] 


Epoch 625:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0848]


Epoch 626:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.67]  


Epoch 627:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.77]  


Epoch 628:


 lr 0.000038	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.71]  


Epoch 629:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.673] 


Epoch 630:


 lr 0.000038	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.19]  


Epoch 631:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.125] 


Epoch 632:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.38]  


Epoch 633:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.371] 


Epoch 634:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0363]


Epoch 635:


 lr 0.000037	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=3.28]  


Epoch 636:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.782] 


Epoch 637:


 lr 0.000037	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=4.27]  


Epoch 638:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.28]  


Epoch 639:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.205] 


Epoch 640:


 lr 0.000037	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0461]


Epoch 641:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.223] 


Epoch 642:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.181] 


Epoch 643:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0599]


Epoch 644:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.835] 


Epoch 645:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.64]  


Epoch 646:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0308]


Epoch 647:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.653] 


Epoch 648:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.879] 


Epoch 649:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.105] 


Epoch 650:


 lr 0.000036	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.76]  


Epoch 651:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.61]  


Epoch 652:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.01]  


Epoch 653:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.35]  


Epoch 654:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0484]


Epoch 655:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.584] 


Epoch 656:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.33]  


Epoch 657:


 lr 0.000035	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=2.71]  


Epoch 658:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.954] 


Epoch 659:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.54]  


Epoch 660:


 lr 0.000035	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.34]  


Epoch 661:


 lr 0.000034	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.569] 


Epoch 662:


 lr 0.000034	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.08]  


Epoch 663:


 lr 0.000034	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.9]   


Epoch 664:


 lr 0.000034	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.77]  


Epoch 665:


 lr 0.000034	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.38]  


Epoch 666:


 lr 0.000034	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.69]  


Epoch 667:


 lr 0.000034	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.703] 


Epoch 668:


 lr 0.000034	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.21]  


Epoch 669:


 lr 0.000034	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.206] 


Epoch 670:


 lr 0.000034	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.57]  


Epoch 671:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0258]


Epoch 672:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.97]  


Epoch 673:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.138] 


Epoch 674:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.596] 


Epoch 675:


 lr 0.000033	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.54]  


Epoch 676:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.23]  


Epoch 677:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.952] 


Epoch 678:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.294] 


Epoch 679:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.26]  


Epoch 680:


 lr 0.000033	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0574]


Epoch 681:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.72]  


Epoch 682:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.7]   


Epoch 683:


 lr 0.000032	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.967] 


Epoch 684:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.707] 


Epoch 685:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.62]  


Epoch 686:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.262] 


Epoch 687:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.61]  


Epoch 688:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.81]  


Epoch 689:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.414] 


Epoch 690:


 lr 0.000032	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.57]  


Epoch 691:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.53]  


Epoch 692:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.266] 


Epoch 693:


 lr 0.000031	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.68]  


Epoch 694:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.149] 


Epoch 695:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.83]  


Epoch 696:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.41]  


Epoch 697:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.969] 


Epoch 698:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.21]  


Epoch 699:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0793]


Epoch 700:


 lr 0.000031	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.27]  


Epoch 701:


 lr 0.000030	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.215] 


Epoch 702:


 lr 0.000030	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.724] 


Epoch 703:


 lr 0.000030	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.122] 


Epoch 704:


 lr 0.000030	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.196] 


Epoch 705:


 lr 0.000030	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.194] 


Epoch 706:


 lr 0.000030	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.99]  


Epoch 707:


 lr 0.000030	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.844] 


Epoch 708:


 lr 0.000030	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.18]  


Epoch 709:


 lr 0.000030	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.93]  


Epoch 710:


 lr 0.000029	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.88]  


Epoch 711:


 lr 0.000029	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.03]  


Epoch 712:


 lr 0.000029	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.227] 


Epoch 713:


 lr 0.000029	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.63]  


Epoch 714:


 lr 0.000029	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.06]  


Epoch 715:


 lr 0.000029	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.629] 


Epoch 716:


 lr 0.000029	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.836] 


Epoch 717:


 lr 0.000029	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.7]   


Epoch 718:


 lr 0.000029	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0359]


Epoch 719:


 lr 0.000029	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.79]  


Epoch 720:


 lr 0.000028	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.384] 


Epoch 721:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.413] 


Epoch 722:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.02]  


Epoch 723:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.34]  


Epoch 724:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.139] 


Epoch 725:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.4]   


Epoch 726:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.94]  


Epoch 727:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.65]  


Epoch 728:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.675] 


Epoch 729:


 lr 0.000028	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.85]  


Epoch 730:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.129] 


Epoch 731:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.502] 


Epoch 732:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.13]  


Epoch 733:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.02]  


Epoch 734:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.13]  


Epoch 735:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.203] 


Epoch 736:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=3.55]  


Epoch 737:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=4.64]  


Epoch 738:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.033] 


Epoch 739:


 lr 0.000027	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=3.77]  


Epoch 740:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.334] 


Epoch 741:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.522] 


Epoch 742:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.08]  


Epoch 743:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.655] 


Epoch 744:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0469]


Epoch 745:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=3.64]  


Epoch 746:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.64]  


Epoch 747:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.166] 


Epoch 748:


 lr 0.000026	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1]     


Epoch 749:


 lr 0.000026	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.953] 


Epoch 750:


 lr 0.000025	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.271] 


Epoch 751:


 lr 0.000025	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.116] 


Epoch 752:


 lr 0.000025	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=1.74]  


Epoch 753:


 lr 0.000025	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.0704]


Epoch 754:


 lr 0.000025	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.36]  


Epoch 755:


 lr 0.000025	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.962] 


Epoch 756:


 lr 0.000025	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=3.64]  


Epoch 757:


 lr 0.000025	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.256] 


Epoch 758:


 lr 0.000025	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=3.84]  


Epoch 759:


 lr 0.000025	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=1.71]  


Epoch 760:


 lr 0.000024	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.039] 


Epoch 761:


 lr 0.000024	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.0765]


Epoch 762:


 lr 0.000024	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.73]  


Epoch 763:


 lr 0.000024	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.172] 


Epoch 764:


 lr 0.000024	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=2.11]  


Epoch 765:


 lr 0.000024	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=3.66]  


Epoch 766:


 lr 0.000024	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=1.4]   


Epoch 767:


 lr 0.000024	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=2.81]  


Epoch 768:


 lr 0.000024	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.0525]


Epoch 769:


 lr 0.000024	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.85]  


Epoch 770:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.0255]


Epoch 771:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.208] 


Epoch 772:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.05]  


Epoch 773:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.799] 


Epoch 774:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.0257]


Epoch 775:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.714] 


Epoch 776:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=1.6]   


Epoch 777:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.194] 


Epoch 778:


 lr 0.000023	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=1.74]  


Epoch 779:


 lr 0.000023	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=4.32]  


Epoch 780:


 lr 0.000022	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=1.03]  


Epoch 781:


 lr 0.000022	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=3.95]  


Epoch 782:


 lr 0.000022	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.651] 


Epoch 783:


 lr 0.000022	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.31]  


Epoch 784:


 lr 0.000022	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.14]  


Epoch 785:


 lr 0.000022	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.0574]


Epoch 786:


 lr 0.000022	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.528] 


Epoch 787:


 lr 0.000022	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=1.21]  


Epoch 788:


 lr 0.000022	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.264] 


Epoch 789:


 lr 0.000022	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.0448]


Epoch 790:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=1.27]  


Epoch 791:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=2.69]  


Epoch 792:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.12]  


Epoch 793:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.0347]


Epoch 794:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.0629]


Epoch 795:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.479] 


Epoch 796:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=4.08]  


Epoch 797:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.373] 


Epoch 798:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.0477]


Epoch 799:


 lr 0.000021	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.798] 


Epoch 800:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.183] 


Epoch 801:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.43]  


Epoch 802:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.71]  


Epoch 803:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.462] 


Epoch 804:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.54]  


Epoch 805:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.274] 


Epoch 806:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.357] 


Epoch 807:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.418] 


Epoch 808:


 lr 0.000020	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=2.42]  


Epoch 809:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.4]   


Epoch 810:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.173] 


Epoch 811:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=1.59]  


Epoch 812:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.48]  


Epoch 813:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=3.24]  


Epoch 814:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.039] 


Epoch 815:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=3.45]  


Epoch 816:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=0.23]  


Epoch 817:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.0363]


Epoch 818:


 lr 0.000019	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.0279]


Epoch 819:


 lr 0.000018	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=2.86]  


Epoch 820:


 lr 0.000018	: 100%|██████████| 494/494 [03:34<00:00,  2.30it/s, Transformer_Loss=2.75]  


Epoch 821:


 lr 0.000018	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.827] 


Epoch 822:


 lr 0.000018	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.424] 


Epoch 823:


 lr 0.000018	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=1.57]  


Epoch 824:


 lr 0.000018	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=3.23]  


Epoch 825:


 lr 0.000018	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=2.18]  


Epoch 826:


 lr 0.000018	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.0949]


Epoch 827:


 lr 0.000018	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.939] 


Epoch 828:


 lr 0.000018	: 100%|██████████| 494/494 [03:34<00:00,  2.31it/s, Transformer_Loss=0.68]  


Epoch 829:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.703] 


Epoch 830:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.802] 


Epoch 831:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.687] 


Epoch 832:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.3]   


Epoch 833:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.36]  


Epoch 834:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.651] 


Epoch 835:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.37]  


Epoch 836:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=0.0262]


Epoch 837:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=1.27]  


Epoch 838:


 lr 0.000017	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.811] 


Epoch 839:


 lr 0.000016	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.98]  


Epoch 840:


 lr 0.000016	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.351] 


Epoch 841:


 lr 0.000016	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.388] 


Epoch 842:


 lr 0.000016	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.3]   


Epoch 843:


 lr 0.000016	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.891] 


Epoch 844:


 lr 0.000016	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.98]  


Epoch 845:


 lr 0.000016	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0805]


Epoch 846:


 lr 0.000016	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.66]  


Epoch 847:


 lr 0.000016	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.06]  


Epoch 848:


 lr 0.000016	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.2]   


Epoch 849:


 lr 0.000015	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.512] 


Epoch 850:


 lr 0.000015	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.773] 


Epoch 851:


 lr 0.000015	: 100%|██████████| 494/494 [03:33<00:00,  2.31it/s, Transformer_Loss=2.59]  


Epoch 852:


 lr 0.000015	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.05]  


Epoch 853:


 lr 0.000015	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.122] 


Epoch 854:


 lr 0.000015	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.22]  


Epoch 855:


 lr 0.000015	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.031] 


Epoch 856:


 lr 0.000015	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.28]  


Epoch 857:


 lr 0.000015	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.183] 


Epoch 858:


 lr 0.000015	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.77]  


Epoch 859:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.301] 


Epoch 860:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.594] 


Epoch 861:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.15]  


Epoch 862:


 lr 0.000014	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.89]  


Epoch 863:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.09]  


Epoch 864:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.166] 


Epoch 865:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.2]   


Epoch 866:


 lr 0.000014	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0519]


Epoch 867:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.285] 


Epoch 868:


 lr 0.000014	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0402]


Epoch 869:


 lr 0.000013	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.44]  


Epoch 870:


 lr 0.000013	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.6]   


Epoch 871:


 lr 0.000013	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.03]  


Epoch 872:


 lr 0.000013	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.617] 


Epoch 873:


 lr 0.000013	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.31]  


Epoch 874:


 lr 0.000013	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.375] 


Epoch 875:


 lr 0.000013	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.815] 


Epoch 876:


 lr 0.000013	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.16]  


Epoch 877:


 lr 0.000013	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.688] 


Epoch 878:


 lr 0.000013	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.16]  


Epoch 879:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.122] 


Epoch 880:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.89]  


Epoch 881:


 lr 0.000012	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.67]  


Epoch 882:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.581] 


Epoch 883:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.57]  


Epoch 884:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.693] 


Epoch 885:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.83]  


Epoch 886:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0459]


Epoch 887:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.422] 


Epoch 888:


 lr 0.000012	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.63]  


Epoch 889:


 lr 0.000011	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.18]  


Epoch 890:


 lr 0.000011	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.55]  


Epoch 891:


 lr 0.000011	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=3.64]  


Epoch 892:


 lr 0.000011	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.264] 


Epoch 893:


 lr 0.000011	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=1.66]  


Epoch 894:


 lr 0.000011	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.184] 


Epoch 895:


 lr 0.000011	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.95]  


Epoch 896:


 lr 0.000011	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.14]  


Epoch 897:


 lr 0.000011	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.09]  


Epoch 898:


 lr 0.000011	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.118] 


Epoch 899:


 lr 0.000010	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0413]


Epoch 900:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.1]   


Epoch 901:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.5]   


Epoch 902:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.731] 


Epoch 903:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.565] 


Epoch 904:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.543] 


Epoch 905:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.12]  


Epoch 906:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.03]  


Epoch 907:


 lr 0.000010	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=4.52]  


Epoch 908:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.0398]


Epoch 909:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.125] 


Epoch 910:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.921] 


Epoch 911:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.218] 


Epoch 912:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.94]  


Epoch 913:


 lr 0.000009	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.59]  


Epoch 914:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.81]  


Epoch 915:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.65]  


Epoch 916:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.29]  


Epoch 917:


 lr 0.000009	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.32]  


Epoch 918:


 lr 0.000008	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.545] 


Epoch 919:


 lr 0.000008	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.57]  


Epoch 920:


 lr 0.000008	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0898]


Epoch 921:


 lr 0.000008	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.353] 


Epoch 922:


 lr 0.000008	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.258] 


Epoch 923:


 lr 0.000008	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=3.22]  


Epoch 924:


 lr 0.000008	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.358] 


Epoch 925:


 lr 0.000008	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.025] 


Epoch 926:


 lr 0.000008	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=0.0247]


Epoch 927:


 lr 0.000008	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.667] 


Epoch 928:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0645]


Epoch 929:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.01]  


Epoch 930:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0976]


Epoch 931:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.724] 


Epoch 932:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.557] 


Epoch 933:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.858] 


Epoch 934:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.3]   


Epoch 935:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.203] 


Epoch 936:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.76]  


Epoch 937:


 lr 0.000007	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.441] 


Epoch 938:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=1.76]  


Epoch 939:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.147] 


Epoch 940:


 lr 0.000006	: 100%|██████████| 494/494 [03:33<00:00,  2.32it/s, Transformer_Loss=2.1]   


Epoch 941:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.38]  


Epoch 942:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.12]  


Epoch 943:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.862] 


Epoch 944:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.52]  


Epoch 945:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.18]  


Epoch 946:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=4.04]  


Epoch 947:


 lr 0.000006	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.546] 


Epoch 948:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.68]  


Epoch 949:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.24]  


Epoch 950:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=3.53]  


Epoch 951:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.78]  


Epoch 952:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.153] 


Epoch 953:


 lr 0.000005	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.299] 


Epoch 954:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.847] 


Epoch 955:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.917] 


Epoch 956:


 lr 0.000005	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.198] 


Epoch 957:


 lr 0.000005	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.946] 


Epoch 958:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.0297]


Epoch 959:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.348] 


Epoch 960:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=2.17]  


Epoch 961:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.13]  


Epoch 962:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=0.929] 


Epoch 963:


 lr 0.000004	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=2.92]  


Epoch 964:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.66]  


Epoch 965:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.26]  


Epoch 966:


 lr 0.000004	: 100%|██████████| 494/494 [03:32<00:00,  2.32it/s, Transformer_Loss=0.417] 


Epoch 967:


 lr 0.000004	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=3.73]  


Epoch 968:


 lr 0.000003	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=1.53]  


Epoch 969:


 lr 0.000003	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.396] 


Epoch 970:


 lr 0.000003	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.8]   


Epoch 971:


 lr 0.000003	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.716] 


Epoch 972:


 lr 0.000003	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=1.94]  


Epoch 973:


 lr 0.000003	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.234] 


Epoch 974:


 lr 0.000003	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=3.72]  


Epoch 975:


 lr 0.000003	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.165] 


Epoch 976:


 lr 0.000003	: 100%|██████████| 494/494 [03:32<00:00,  2.33it/s, Transformer_Loss=2.53]  


Epoch 977:


 lr 0.000003	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.313] 


Epoch 978:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=1.24]  


Epoch 979:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=1.94]  


Epoch 980:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=2.45]  


Epoch 981:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.537] 


Epoch 982:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=2.51]  


Epoch 983:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=3.99]  


Epoch 984:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.506] 


Epoch 985:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=0.0256]


Epoch 986:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=2.32]  


Epoch 987:


 lr 0.000002	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=3.09]  


Epoch 988:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=1.49]  


Epoch 989:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=0.836] 


Epoch 990:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=0.279] 


Epoch 991:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=3.01]  


Epoch 992:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.33it/s, Transformer_Loss=0.999] 


Epoch 993:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=0.434] 


Epoch 994:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=0.169] 


Epoch 995:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=1.32]  


Epoch 996:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=3.29]  


Epoch 997:


 lr 0.000001	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=4.33]  


Epoch 998:


 lr 0.000000	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=0.285] 


Epoch 999:


 lr 0.000000	: 100%|██████████| 494/494 [03:30<00:00,  2.34it/s, Transformer_Loss=3.28]  


Epoch 1000:


 lr 0.000000	: 100%|██████████| 494/494 [03:31<00:00,  2.34it/s, Transformer_Loss=0.534] 
