In [1]:
import torch
import numpy as np
import random
import sentencepiece as spm
from transformers import ReformerConfig, ReformerTokenizer, ReformerModel, PreTrainedModel
from torch.utils.data import DataLoader, Dataset


NUM_BATCHES = None
BATCH_SIZE = 6
GRADIENT_ACCUMULATE_EVERY = 3
LEARNING_RATE = 0.01 #1e-4
VALIDATE_EVERY  = 12
SEQ_LEN = 4608 # 4608/64=72, 4608=64*72

In [2]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x1669f23c070>

In [3]:
# def split_file(file,out1,out2,percentage=0.75,isShuffle=True,seed=42):
#     """quora.com/How-can-split-a-text-file-randomly-in-75-and-25-and-create-two-output-file-in-python
#     """
#     random.seed(seed)
#     with open(file, 'r',encoding="utf-8") as fin, open(out1, 'w') as foutBig, open(out2, 'w') as foutSmall:
#         nLines = sum(1 for line in fin)
#         fin.seek(0)

#         nTrain = int(nLines*percentage) 
#         nValid = nLines - nTrain

#         i = 0
#         for line in fin:
#             r = random.random() if isShuffle else 0 # so that always evaluated to true when not isShuffle
#             if (i < nTrain and r < percentage) or (nLines - i > nValid):
#                 foutBig.write(line)
#                 i += 1
#             else:
#                 foutSmall.write(line)
                
# split_file("data/yeast/yeast.txt", 
#            "data/yeast/yeast_train.txt",
#            "data/yeast/yeast_val.txt",
#            percentage=0.9)

In [3]:
spm.SentencePieceTrainer.Train("--input=./data/tokenizer_training/AAresiduals.txt \
                                --vocab_size=28 \
                                --model_prefix=sequence_tokenizer \
                                --model_type=char \
                                --character_coverage=1.0")
tokenizer = ReformerTokenizer(vocab_file="sequence_tokenizer.model", do_lower_case=False, model_max_length=SEQ_LEN)

In [96]:
# tokenizer.save_vocabulary('./')

In [97]:
tokenizer.all_special_tokens, tokenizer.all_special_ids, tokenizer.vocab_size

(['<unk>', '<pad>', '</s>'], [0, 0, 2], 28)

In [104]:
[(v, tokenizer.decode(v)) for i, v in enumerate(range(28))]

[(0, '<unk>'),
 (1, '<s>'),
 (2, '</s>'),
 (3, 'A'),
 (4, 'B'),
 (5, 'C'),
 (6, 'D'),
 (7, 'E'),
 (8, 'F'),
 (9, 'G'),
 (10, 'H'),
 (11, 'I'),
 (12, 'K'),
 (13, 'L'),
 (14, 'M'),
 (15, 'N'),
 (16, 'O'),
 (17, 'P'),
 (18, 'Q'),
 (19, 'R'),
 (20, 'S'),
 (21, 'T'),
 (22, 'U'),
 (23, 'V'),
 (24, 'W'),
 (25, 'X'),
 (26, 'Y'),
 (27, 'Z')]

In [117]:
print(tokenizer.bos_token, tokenizer.bos_token_id, tokenizer.eos_token, tokenizer.eos_token_id,
tokenizer.unk_token, tokenizer.unk_token_id, tokenizer.pad_token, tokenizer.pad_token_id,
tokenizer.mask_token, tokenizer.mask_token_id)

Using mask_token, but it is not set yet.
Using mask_token, but it is not set yet.


<s> 1 </s> 2 <unk> 0 <pad> 0 None None


In [118]:
# tokenizer._bos_token = '<s>'
# # tokenizer._bos_token_id = 1
# tokenizer._eos_token = '</s>'
# # tokenizer._eos_token_id = 2
# tokenizer._unk_token = '<unk>'
# # tokenizer._unk_token_id = 3
# tokenizer._pad_token = '<pad>'
# # tokenizer._pad_token_id = 0
# tokenizer._mask_token = '<mask>'
# # tokenizer._mask_token_id = 4


In [119]:
tokenizer.all_special_ids

[2, 1, 0, 0, 0]

In [120]:
tokenizer.all_special_tokens

['</s>', '<s>', '<mask>', '<unk>', '<pad>']

In [124]:
tokenizer.batch_encode_plus(['AB<mask>CD', 
                             'BBCD', 
                             'CBCD'], add_special_tokens=True, pad_to_max_length=True, max_length=10)

{'input_ids': [[0, 3, 4, 0, 5, 6, 0, 0, 0, 0], [0, 4, 4, 5, 6, 0, 0, 0, 0, 0], [0, 5, 4, 5, 6, 0, 0, 0, 0, 0]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]]}

In [7]:
# with open('data/yeast/yeast.txt') as file:
#     X = [l.strip() for l in file]
#     y = [len(l) for l in X]
# print(max(y))

In [8]:
class SequenceDataset(Dataset):
    def __init__(self, masked_input_ids, masked_labels, attention_mask, tokenizer, _len):
        super().__init__()
        self.input_ids = masked_input_ids
        self.labels = masked_labels
        self.attention_mask = attention_mask
        self.tokenizer = tokenizer
        self._len = _len

    @classmethod
    def prepare_from_file(cls, file_path, tokenizer):
        with open(file_path) as file:
            X = [l.strip() for l in file]
            # cut sequence into certain length (e.g. if max length is 10, cut to 10)
            X = [tokenizer.encode(sequence)[1:tokenizer.max_len+1] for sequence in X]
            temp = [cls.mask_random(sequence) for sequence in X]
            masked_sequences = [t[0] for t in temp]
            masked_labels = [t[1] for t in temp]
            temp = [tokenizer.prepare_for_model(masked_sequence, 
                                                max_length=tokenizer.max_len,
                                                pad_to_max_length=True) for masked_sequence in masked_sequences]
            masked_labels = [tokenizer.prepare_for_model(masked_label,
                                                         max_length=tokenizer.max_len,
                                                         pad_to_max_length=True)["input_ids"] for masked_label in masked_labels]
            masked_input_ids = [torch.tensor(l["input_ids"]) for l in temp]
            masked_labels = [torch.tensor(masked_label) for masked_label in masked_labels]
            attention_mask = [torch.tensor(l["attention_mask"]) for l in temp]
            del(temp)
        masked_input_ids = torch.stack([masked_input_ids[i] for i in range(len(masked_input_ids))]).squeeze()
        masked_labels = torch.stack([masked_labels[i] for i in range(len(masked_labels))]).squeeze()
        attention_mask = torch.stack([attention_mask[i] for i in range(len(attention_mask))]).squeeze()
        
        return cls(masked_input_ids, masked_labels, attention_mask, tokenizer, len(masked_input_ids))
    
    @staticmethod
    def mask_random(sequence):
        masked_sequence = [residual for residual in sequence]
        masked_labels = []
        
        for i, residual in enumerate(masked_sequence):
            prob = random.random()
            if prob < 0.15:
                prob /= 0.15
                if prob < 0.8:
                    masked_sequence[i] = 0
                elif prob < 0.9:
                    masked_sequence[i] = random.randrange(tokenizer.vocab_size)
                masked_labels.append(residual)
            else:
#                 masked_labels.append(-100)
                masked_labels.append(0)
        return masked_sequence, masked_labels

    def __getitem__(self, index):
        return self.input_ids[index, ].cuda(), self.labels[index, ].cuda(), self.attention_mask[index, ].cuda()

    def __len__(self):
        return self._len

In [9]:
# x = SequenceDataset.prepare_from_file("data/yeast/yeast.txt", tokenizer)

In [10]:
# x.labels, x.attention_mask, x.input_ids, x.labels.shape, x.attention_mask.shape, x.input_ids.shape

In [11]:
import torch
from torch import nn
from torch.nn import CrossEntropyLoss
from transformers.modeling_reformer import ACT2FN, ReformerModel, PreTrainedModel, ReformerPreTrainedModel
ReformerLayerNorm = torch.nn.LayerNorm
ACT2FN = {"relu": torch.nn.functional.relu}


class ReformerMLMHead(nn.Module):
    def __init__(self, config):
        super().__init__()
        
        self.dense = nn.Linear(config.hidden_size * 2, config.hidden_size)
        if isinstance(config.hidden_act, str):
            self.transform_act_fn = ACT2FN[config.hidden_act]
        else:
            self.transform_act_fn = config.hidden_act
        self.LayerNorm = ReformerLayerNorm(config.hidden_size, eps=config.layer_norm_eps)

        self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
#         print(self.decoder)
        self.bias = nn.Parameter(torch.zeros(config.vocab_size))
        self.decoder.bias = self.bias

    def forward(self, hidden_states):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.transform_act_fn(hidden_states)
        hidden_states = self.LayerNorm(hidden_states)
        logits = self.decoder(hidden_states)
        return logits


class ReformerMLM(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.reformer = ReformerModel(config)
        self.mlm = ReformerMLMHead(config)
        
    def forward(self, input_ids=None, attention_mask=None, position_ids=None, head_mask=None, 
                inputs_embeds=None, labels=None, output_attentions=None, **kwargs):
        reformer_outputs = self.reformer(input_ids, position_ids=position_ids, attention_mask=attention_mask,
                                          head_mask=head_mask, inputs_embeds=inputs_embeds, num_hashes=None)

        hidden_states = reformer_outputs[0]
#         print("output shape of the ReformerModel: {}".format(hidden_states.shape))
        logits = self.mlm(hidden_states)
#         print("logits shape: {}".format(logits.shape))
        outputs = (logits,) + reformer_outputs[1:]

        if labels is not None:
            loss_fct = CrossEntropyLoss(ignore_index=0)  # -100 index = padding token
            masked_lm_loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
            outputs = (masked_lm_loss,) + outputs
        return outputs  # (lm_loss), lm_logits, (hidden_states), (attentions)
    


In [12]:
tokenizer.vocab_size

28

In [13]:
configuration = ReformerConfig()
configuration.axial_pos_shape = (64, 72)
configuration.max_position_embeddings = SEQ_LEN # 4608
configuration.vocab_size = tokenizer.vocab_size # 28
configuration.architectures=None
configuration.is_decoder=False
configuration.hidden_size=256 # ReformerModel output size = 512
configuration.save_pretrained('model/fix_masked_LM_config/')
configuration = ReformerConfig.from_pretrained('model/fix_masked_LM_config/')

model = ReformerMLM(configuration)

In [14]:
# lab_x=torch.zeros((2,4608), dtype=torch.int64)

In [15]:
# lab_x

In [16]:
# outputs = model(input_ids=inp, attention_mask=att, labels=lab)
# When training a model from scratch, it is recommended to leave ``config.num_buckets=None``, 
# so that depending on the sequence length a good value for ``num_buckets`` is calculated on the fly. 
# This value will then automatically be saved in the config and should be reused for inference.

In [17]:
train_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_train.txt", tokenizer)
val_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_val.txt", tokenizer)

Token indices sequence length is longer than the specified maximum sequence length for this model (4911 > 4608). Running this sequence through the model will result in indexing errors


In [18]:
# train_dataset.labels

In [19]:
def cycle(loader):
    while True:
        for data in loader:
            yield data
            
def collate_fn(batch):
    """
    batch:[(inputs, labels, mask),...]
    [(tensor([14,  3, 11, 0,  0], device='cuda:0'), 
      tensor([0, 0, 0, 0, 0], device='cuda:0'), 
      tensor([1, 1, 1, 0, 0], device='cuda:0')), ...]
    """
    inputs = torch.stack([x[0] for x in batch])
    labels = torch.stack([x[1] for x in batch])
    attention_mask = torch.stack([x[2] for x in batch])

    output = {"inputs": inputs,
              "labels": labels,
              "attention_mask": attention_mask}

    return output

train_loader = cycle(DataLoader(train_dataset, collate_fn=collate_fn, batch_size=BATCH_SIZE))
val_loader = cycle(DataLoader(val_dataset, collate_fn=collate_fn, batch_size=BATCH_SIZE))

In [20]:
next(train_loader)

{'inputs': tensor([[14,  0, 11,  ...,  0,  0,  0],
         [14, 26, 26,  ...,  0,  0,  0],
         [14, 20,  3,  ...,  0,  0,  0],
         [14, 15,  0,  ...,  0,  0,  0],
         [14, 10, 21,  ...,  0,  0,  0],
         [14, 11,  0,  ...,  0,  0,  0]], device='cuda:0'),
 'labels': tensor([[ 0,  3,  0,  ...,  0,  0,  0],
         [ 0,  0,  0,  ...,  0,  0,  0],
         [ 0,  0,  7,  ...,  0,  0,  0],
         [ 0,  0, 21,  ...,  0,  0,  0],
         [ 0,  0,  0,  ...,  0,  0,  0],
         [ 0,  0, 19,  ...,  0,  0,  0]], device='cuda:0'),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]], device='cuda:0')}

In [21]:
from transformers import AdamW
optimizer = AdamW(params=model.parameters(), lr=LEARNING_RATE)
NUM_BATCHES = len(train_dataset)//BATCH_SIZE

model.cuda()

ReformerMLM(
  (reformer): ReformerModel(
    (embeddings): ReformerEmbeddings(
      (word_embeddings): Embedding(28, 256)
      (position_embeddings): AxialPositionEmbeddings(
        (weights): ParameterList(
            (0): Parameter containing: [torch.cuda.FloatTensor of size 64x1x64 (GPU 0)]
            (1): Parameter containing: [torch.cuda.FloatTensor of size 1x72x192 (GPU 0)]
        )
      )
    )
    (encoder): ReformerEncoder(
      (layers): ModuleList(
        (0): ReformerLayer(
          (attention): ReformerAttention(
            (layer_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
            (self_attention): LocalSelfAttention(
              (query): Linear(in_features=256, out_features=128, bias=False)
              (key): Linear(in_features=256, out_features=128, bias=False)
              (value): Linear(in_features=256, out_features=128, bias=False)
            )
            (output): ReformerSelfOutput(
              (dense): Linear(in_features=

In [23]:
import tqdm
for x in range(5):
    for i in tqdm.tqdm(range(NUM_BATCHES), mininterval=10., desc='training'):

        model.train()

        for __ in range(GRADIENT_ACCUMULATE_EVERY):
            tmp = next(train_loader)
            inputs = tmp['inputs']
            labels = tmp['labels']
            attention_mask = tmp['attention_mask']

            outputs = model(input_ids=inputs, labels=labels, attention_mask=attention_mask)
            loss, prediction_scores = outputs[:2]
            loss.backward()

        print(f'training loss: {loss.item()}')

        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)

        optimizer.step()
        optimizer.zero_grad()

        if i % VALIDATE_EVERY == 0:
            model.eval()
            with torch.no_grad():
                tmp = next(train_loader)
                inputs = tmp['inputs']
                labels = tmp['labels']
                attention_mask = tmp['attention_mask']
                outputs = model(input_ids=inputs, labels=labels, attention_mask=attention_mask)
                loss, prediction_scores = outputs[:2]
                print(f'validation loss: {loss.item()}')

training:   0%|                                                                                                                                  | 0/907 [00:00<?, ?it/s]

training loss: 2.9347362518310547
validation loss: 2.8914644718170166
training loss: 2.8864529132843018
training loss: 2.9085187911987305
training loss: 2.9765799045562744
training loss: 2.9061970710754395
training loss: 2.887698173522949
training loss: 2.9558684825897217
training loss: 2.8694169521331787
training loss: 2.8877532482147217
training loss: 2.901780843734741
training loss: 2.9264981746673584
training loss: 2.887889862060547
training loss: 2.864699602127075
validation loss: 2.9043123722076416
training loss: 2.8842380046844482


training:   2%|██                                                                                                                       | 15/907 [00:10<10:35,  1.40it/s]

training loss: 2.868661880493164
training loss: 2.8706462383270264
training loss: 2.8996357917785645
training loss: 2.8958234786987305
training loss: 2.864328622817993
training loss: 2.8696482181549072
training loss: 2.8836920261383057
training loss: 2.8888814449310303
training loss: 2.9204964637756348
training loss: 2.9000773429870605
training loss: 2.880237579345703
validation loss: 2.902726888656616
training loss: 2.865835666656494
training loss: 2.8833723068237305
training loss: 2.9310190677642822
training loss: 2.8786799907684326


training:   3%|████                                                                                                                     | 30/907 [00:21<10:23,  1.41it/s]

training loss: 2.847700595855713
training loss: 2.924265146255493
training loss: 2.877284288406372
training loss: 2.902371644973755
training loss: 2.894578218460083
training loss: 2.8695082664489746
training loss: 2.924144744873047
training loss: 2.9155304431915283
validation loss: 2.894442558288574
training loss: 2.899120807647705
training loss: 2.8512346744537354
training loss: 2.8849799633026123
training loss: 2.9083662033081055
training loss: 2.8672235012054443
training loss: 2.8835690021514893
training loss: 2.922081232070923


training:   5%|██████                                                                                                                   | 45/907 [00:31<10:13,  1.41it/s]

training loss: 2.8649799823760986
training loss: 2.856004238128662
training loss: 2.861687660217285
training loss: 2.853656053543091
training loss: 2.886746406555176
validation loss: 2.8759186267852783
training loss: 2.929192066192627
training loss: 2.866621255874634
training loss: 2.9004440307617188
training loss: 2.872728109359741
training loss: 2.8837497234344482
training loss: 2.8771016597747803
training loss: 2.8964202404022217
training loss: 2.87721848487854
training loss: 2.937598705291748
training loss: 2.8856818675994873


training:   7%|████████                                                                                                                 | 60/907 [00:42<10:04,  1.40it/s]

training loss: 2.8714096546173096
training loss: 2.835144281387329
validation loss: 2.9161300659179688
training loss: 2.886322021484375
training loss: 2.8705310821533203
training loss: 2.900301933288574
training loss: 2.8980937004089355
training loss: 2.865022897720337
training loss: 2.8753838539123535
training loss: 2.8814570903778076
training loss: 2.8658511638641357
training loss: 2.8632335662841797
training loss: 2.8816146850585938
training loss: 2.8796114921569824
training loss: 2.8827555179595947
validation loss: 2.9571144580841064


training:   8%|█████████▊                                                                                                               | 74/907 [00:52<09:55,  1.40it/s]

training loss: 2.8539116382598877
training loss: 2.9180045127868652
training loss: 2.849268913269043
training loss: 2.871344804763794
training loss: 2.88881254196167
training loss: 2.890535593032837
training loss: 2.880772829055786
training loss: 2.90454363822937
training loss: 2.8628382682800293
training loss: 2.906559944152832
training loss: 2.8827364444732666
training loss: 2.8775224685668945
validation loss: 2.9215610027313232
training loss: 2.906057119369507
training loss: 2.8721837997436523
training loss: 2.912743330001831


training:  10%|███████████▊                                                                                                             | 89/907 [01:03<09:43,  1.40it/s]

training loss: 2.9422638416290283
training loss: 2.8669378757476807
training loss: 2.849383592605591
training loss: 2.878859758377075
training loss: 2.901517152786255
training loss: 2.9128053188323975
training loss: 2.9047956466674805
training loss: 2.916076421737671
training loss: 2.9119999408721924
validation loss: 2.9023263454437256
training loss: 2.896080255508423
training loss: 2.8885042667388916
training loss: 2.842970371246338
training loss: 2.857142686843872
training loss: 2.897292137145996
training loss: 2.905569076538086


training:  11%|█████████████▊                                                                                                          | 104/907 [01:14<09:30,  1.41it/s]

training loss: 2.9025933742523193
training loss: 2.849635601043701
training loss: 2.9004812240600586
training loss: 2.911926507949829
training loss: 2.898181915283203
training loss: 2.947693109512329
validation loss: 2.8743631839752197
training loss: 2.9150660037994385
training loss: 2.873253107070923
training loss: 2.8557796478271484
training loss: 2.934171199798584
training loss: 2.88273286819458
training loss: 2.8804688453674316
training loss: 2.88053560256958
training loss: 2.8542871475219727
training loss: 2.8862693309783936


training:  13%|███████████████▋                                                                                                        | 119/907 [01:24<09:19,  1.41it/s]

training loss: 2.896754026412964
training loss: 2.90195369720459
training loss: 2.909564256668091
validation loss: 2.9214515686035156
training loss: 2.911952495574951
training loss: 2.8681814670562744
training loss: 2.8651652336120605
training loss: 2.9110333919525146
training loss: 2.91870379447937
training loss: 2.847640037536621
training loss: 2.938741683959961
training loss: 2.9075398445129395
training loss: 2.8609447479248047
training loss: 2.865391969680786
training loss: 2.8607661724090576
training loss: 2.893803119659424
validation loss: 2.8787293434143066


training:  15%|█████████████████▋                                                                                                      | 134/907 [01:35<09:08,  1.41it/s]

training loss: 2.8678770065307617
training loss: 2.8824005126953125
training loss: 2.8611109256744385
training loss: 2.8552329540252686
training loss: 2.827334403991699
training loss: 2.9306797981262207
training loss: 2.847677230834961
training loss: 2.895357608795166
training loss: 2.901648998260498
training loss: 2.8946151733398438
training loss: 2.8548483848571777
training loss: 2.9033608436584473
validation loss: 2.888388156890869
training loss: 2.8675787448883057
training loss: 2.896497964859009
training loss: 2.8487422466278076


training:  16%|███████████████████▋                                                                                                    | 149/907 [01:45<08:57,  1.41it/s]

training loss: 2.8625714778900146
training loss: 2.874403238296509
training loss: 2.8437561988830566
training loss: 2.937284469604492
training loss: 2.8586597442626953
training loss: 2.893099069595337
training loss: 2.924612045288086
training loss: 2.8906867504119873
training loss: 2.9532203674316406
validation loss: 2.9010837078094482
training loss: 2.8838138580322266
training loss: 2.932622194290161
training loss: 2.8832175731658936
training loss: 2.8795671463012695
training loss: 2.8967018127441406
training loss: 2.858079195022583


training:  18%|█████████████████████▋                                                                                                  | 164/907 [01:56<08:47,  1.41it/s]

training loss: 2.924325704574585
training loss: 2.922121047973633
training loss: 2.895848274230957
training loss: 2.854299783706665
training loss: 2.8800668716430664
training loss: 2.8614566326141357
validation loss: 2.8268139362335205
training loss: 2.895054578781128
training loss: 2.911672830581665
training loss: 2.874691963195801
training loss: 2.899772882461548
training loss: 2.859387159347534
training loss: 2.8704607486724854
training loss: 2.8918614387512207
training loss: 2.8972363471984863
training loss: 2.9347431659698486


training:  20%|███████████████████████▋                                                                                                | 179/907 [02:07<08:36,  1.41it/s]

training loss: 2.899592161178589
training loss: 2.8629820346832275
training loss: 2.8728816509246826
validation loss: 2.914130687713623
training loss: 2.861083745956421
training loss: 2.9076218605041504
training loss: 2.8704099655151367
training loss: 2.875645399093628
training loss: 2.8945133686065674
training loss: 2.8995168209075928
training loss: 2.899543523788452
training loss: 2.924506664276123
training loss: 2.874828577041626
training loss: 2.8756303787231445
training loss: 2.8736085891723633
training loss: 2.8958306312561035
validation loss: 2.885456085205078


training:  21%|█████████████████████████▋                                                                                              | 194/907 [02:17<08:25,  1.41it/s]

training loss: 2.9074981212615967
training loss: 2.8591644763946533
training loss: 2.8987717628479004
training loss: 2.9070537090301514
training loss: 2.9071803092956543
training loss: 2.894350528717041
training loss: 2.9286580085754395
training loss: 2.8765504360198975
training loss: 2.9156923294067383
training loss: 2.8886184692382812
training loss: 2.8122551441192627
training loss: 2.8819146156311035
validation loss: 2.8798091411590576
training loss: 2.896183967590332
training loss: 2.887263059616089
training loss: 2.8860864639282227


training:  23%|███████████████████████████▋                                                                                            | 209/907 [02:28<08:14,  1.41it/s]

training loss: 2.8707966804504395
training loss: 2.883716106414795
training loss: 2.8687214851379395
training loss: 2.9308419227600098
training loss: 2.890084743499756
training loss: 2.90228533744812
training loss: 2.8821778297424316
training loss: 2.883087396621704
training loss: 2.8955912590026855
validation loss: 2.8705995082855225
training loss: 2.9031717777252197
training loss: 2.8128459453582764
training loss: 2.853297710418701
training loss: 2.9060964584350586
training loss: 2.8926048278808594
training loss: 2.8346033096313477


training:  25%|█████████████████████████████▋                                                                                          | 224/907 [02:39<08:04,  1.41it/s]

training loss: 2.8749306201934814
training loss: 2.8479247093200684
training loss: 2.8319244384765625
training loss: 2.8775389194488525
training loss: 2.895679473876953
training loss: 2.880382776260376
validation loss: 2.8484411239624023
training loss: 2.8890955448150635
training loss: 2.853978157043457
training loss: 2.9091622829437256
training loss: 2.9038734436035156
training loss: 2.9090230464935303
training loss: 2.8851799964904785
training loss: 2.862745523452759
training loss: 2.87302303314209
training loss: 2.8797192573547363


training:  26%|███████████████████████████████▌                                                                                        | 239/907 [02:49<07:53,  1.41it/s]

training loss: 2.8799915313720703
training loss: 2.892376184463501
training loss: 2.902576208114624
validation loss: 2.9031925201416016
training loss: 2.894666910171509
training loss: 2.88230299949646
training loss: 2.862184524536133
training loss: 2.8651554584503174
training loss: 2.897094488143921
training loss: 2.8640618324279785
training loss: 2.852170705795288
training loss: 2.8973236083984375
training loss: 2.874345064163208
training loss: 2.866530179977417
training loss: 2.8682737350463867
training loss: 2.8807027339935303
validation loss: 2.8758318424224854


training:  28%|█████████████████████████████████▌                                                                                      | 254/907 [03:00<07:43,  1.41it/s]

training loss: 2.8820931911468506
training loss: 2.8904953002929688
training loss: 2.910465717315674
training loss: 2.851043224334717
training loss: 2.9240500926971436
training loss: 2.8902528285980225
training loss: 2.8744418621063232
training loss: 2.945110559463501
training loss: 2.858234167098999
training loss: 2.8869030475616455
training loss: 2.884450912475586
training loss: 2.9455323219299316
validation loss: 2.871678590774536
training loss: 2.9141383171081543
training loss: 2.883186101913452
training loss: 2.857494831085205


training:  30%|███████████████████████████████████▌                                                                                    | 269/907 [03:11<07:34,  1.40it/s]

training loss: 2.8718879222869873
training loss: 2.8941807746887207
training loss: 2.9349188804626465
training loss: 2.8556134700775146
training loss: 2.909193277359009
training loss: 2.936206817626953
training loss: 2.9081077575683594
training loss: 2.861456871032715
training loss: 2.9075329303741455
validation loss: 2.902623176574707
training loss: 2.8709311485290527
training loss: 2.9586129188537598
training loss: 2.872211456298828
training loss: 2.869378089904785
training loss: 2.868372678756714
training loss: 2.910994291305542


training:  31%|█████████████████████████████████████▌                                                                                  | 284/907 [03:21<07:23,  1.40it/s]

training loss: 2.8635435104370117
training loss: 2.879427909851074
training loss: 2.927006959915161
training loss: 2.9143965244293213
training loss: 2.8629794120788574
training loss: 2.9424595832824707
validation loss: 3.1189725399017334
training loss: 2.8926732540130615
training loss: 2.904649257659912
training loss: 2.8854870796203613
training loss: 2.9263367652893066
training loss: 2.887950897216797
training loss: 2.871612548828125
training loss: 2.8202757835388184
training loss: 2.9166181087493896
training loss: 2.867931604385376


training:  33%|███████████████████████████████████████▌                                                                                | 299/907 [03:32<07:12,  1.40it/s]

training loss: 2.899003744125366
training loss: 2.885122537612915
training loss: 2.8944544792175293
validation loss: 2.882511615753174
training loss: 2.9024410247802734
training loss: 2.887482166290283
training loss: 2.8932414054870605
training loss: 2.899559259414673
training loss: 2.893742799758911
training loss: 2.903402805328369
training loss: 2.9067583084106445
training loss: 2.9064016342163086
training loss: 2.862212896347046
training loss: 2.91752552986145
training loss: 2.9046895503997803
training loss: 2.8732008934020996
validation loss: 2.8666746616363525


training:  35%|█████████████████████████████████████████▌                                                                              | 314/907 [03:43<07:04,  1.40it/s]

training loss: 2.9206643104553223
training loss: 2.9027812480926514
training loss: 2.9046592712402344
training loss: 2.8759102821350098
training loss: 2.8985259532928467
training loss: 2.8619163036346436
training loss: 2.903369188308716
training loss: 2.8931195735931396
training loss: 2.8821732997894287
training loss: 2.8968169689178467
training loss: 2.8629391193389893
training loss: 2.894066333770752
validation loss: 2.925659656524658
training loss: 2.9486117362976074
training loss: 2.9098267555236816


training:  36%|███████████████████████████████████████████▍                                                                            | 328/907 [03:53<06:54,  1.40it/s]

training loss: 2.8493716716766357
training loss: 2.8788816928863525
training loss: 2.9374947547912598
training loss: 2.9203085899353027
training loss: 2.8559930324554443
training loss: 2.841325521469116
training loss: 2.8592724800109863
training loss: 2.90826153755188
training loss: 2.906447649002075
training loss: 2.846872091293335
validation loss: 2.8516523838043213
training loss: 2.923656463623047
training loss: 2.8761801719665527
training loss: 2.89300799369812
training loss: 2.8884081840515137
training loss: 2.8446812629699707


training:  38%|█████████████████████████████████████████████▍                                                                          | 343/907 [04:04<06:43,  1.40it/s]

training loss: 2.8679628372192383
training loss: 2.8253231048583984
training loss: 2.9335455894470215
training loss: 2.870685338973999
training loss: 2.88525128364563
training loss: 2.8893942832946777
training loss: 2.9099068641662598
validation loss: 2.903292655944824
training loss: 2.8578732013702393
training loss: 2.8904218673706055
training loss: 2.8916001319885254
training loss: 2.8962090015411377
training loss: 2.8572208881378174
training loss: 2.8810808658599854
training loss: 2.8731484413146973
training loss: 2.852090835571289


training:  39%|███████████████████████████████████████████████▎                                                                        | 358/907 [04:14<06:31,  1.40it/s]

training loss: 2.88191294670105
training loss: 2.8998565673828125
training loss: 2.8469362258911133
training loss: 2.882985830307007
validation loss: 2.8857016563415527
training loss: 2.8743486404418945
training loss: 2.8863186836242676
training loss: 2.828277111053467
training loss: 2.8426003456115723
training loss: 2.93890643119812
training loss: 2.8729982376098633
training loss: 2.879232883453369
training loss: 2.9330739974975586
training loss: 2.8926806449890137
training loss: 2.8704545497894287
training loss: 2.947937488555908


training:  41%|█████████████████████████████████████████████████▎                                                                      | 373/907 [04:25<06:22,  1.40it/s]

training loss: 2.939458131790161
validation loss: 2.8606173992156982
training loss: 2.8730320930480957
training loss: 2.882730007171631
training loss: 2.898541212081909
training loss: 2.8760714530944824
training loss: 2.8724544048309326
training loss: 2.9163405895233154
training loss: 2.9196465015411377
training loss: 2.900960683822632
training loss: 2.8820295333862305
training loss: 2.898019313812256
training loss: 2.8493332862854004
training loss: 2.8791754245758057
validation loss: 2.8553519248962402
training loss: 2.8678104877471924
training loss: 2.863795042037964


training:  43%|███████████████████████████████████████████████████▎                                                                    | 388/907 [04:36<06:10,  1.40it/s]

training loss: 2.8619980812072754
training loss: 2.8884952068328857
training loss: 2.881248712539673
training loss: 2.8609049320220947
training loss: 2.918837308883667
training loss: 2.9769327640533447
training loss: 2.9144515991210938
training loss: 2.887699842453003
training loss: 2.863492012023926
training loss: 2.8389110565185547
validation loss: 2.893221855163574
training loss: 2.9146416187286377
training loss: 2.8881289958953857
training loss: 2.936007499694824
training loss: 2.834730386734009
training loss: 2.8562428951263428


training:  44%|█████████████████████████████████████████████████████▎                                                                  | 403/907 [04:46<06:00,  1.40it/s]

training loss: 2.8763129711151123
training loss: 2.8633527755737305
training loss: 2.950019598007202
training loss: 2.8870983123779297
training loss: 2.8622467517852783
training loss: 2.9655916690826416
training loss: 2.9253501892089844
validation loss: 2.9041247367858887
training loss: 2.8785457611083984
training loss: 2.922290802001953
training loss: 2.8902595043182373
training loss: 2.843750476837158
training loss: 2.898366689682007
training loss: 2.866194248199463
training loss: 2.828242301940918


training:  46%|███████████████████████████████████████████████████████▏                                                                | 417/907 [04:57<05:50,  1.40it/s]

training loss: 2.8592774868011475
training loss: 2.922085762023926
training loss: 2.8939640522003174
training loss: 2.8943021297454834
training loss: 2.881831407546997
validation loss: 2.9104461669921875
training loss: 2.8921265602111816
training loss: 2.892911672592163
training loss: 2.86501407623291
training loss: 2.8869409561157227
training loss: 2.899930715560913
training loss: 2.9180908203125
training loss: 2.890859603881836
training loss: 2.879241943359375
training loss: 2.899376630783081


training:  48%|█████████████████████████████████████████████████████████                                                               | 431/907 [05:07<05:40,  1.40it/s]

training loss: 2.873802900314331
training loss: 2.9074649810791016
training loss: 2.9070515632629395
validation loss: 2.941171884536743
training loss: 2.8852267265319824
training loss: 2.922919273376465
training loss: 2.8783113956451416
training loss: 2.913510799407959
training loss: 2.8985297679901123
training loss: 2.901616334915161
training loss: 2.8993630409240723
training loss: 2.853018045425415
training loss: 2.8334600925445557


training:  49%|██████████████████████████████████████████████████████████▌                                                             | 443/907 [05:20<06:25,  1.20it/s]

training loss: 2.888099431991577
training loss: 2.8697359561920166
training loss: 2.8644919395446777
validation loss: 2.837608575820923
training loss: 2.8762381076812744
training loss: 2.9379494190216064
training loss: 2.867926836013794
training loss: 2.829700231552124
training loss: 2.8678014278411865
training loss: 2.9027934074401855
training loss: 2.8833863735198975


training:  50%|███████████████████████████████████████████████████████████▉                                                            | 453/907 [05:34<07:39,  1.01s/it]

training loss: 2.859842300415039
training loss: 2.9150137901306152
training loss: 2.8972508907318115
training loss: 2.9290683269500732
training loss: 2.8708908557891846
validation loss: 2.9014275074005127
training loss: 2.8655385971069336
training loss: 2.8866751194000244


training:  51%|████████████████████████████████████████████████████████████▊                                                           | 460/907 [05:44<08:28,  1.14s/it]

training loss: 2.906290054321289
training loss: 2.8780617713928223
training loss: 2.92765736579895
training loss: 2.9094111919403076
training loss: 2.8650918006896973
training loss: 2.9022793769836426
training loss: 2.8945834636688232


training:  51%|█████████████████████████████████████████████████████████████▊                                                          | 467/907 [05:54<09:00,  1.23s/it]

training loss: 2.8694663047790527
training loss: 2.955317735671997
training loss: 2.893789291381836
validation loss: 2.908560276031494
training loss: 2.8427088260650635
training loss: 2.905506134033203
training loss: 2.87888765335083
training loss: 2.8372745513916016


training:  52%|██████████████████████████████████████████████████████████████▋                                                         | 474/907 [06:04<09:18,  1.29s/it]

training loss: 2.886212110519409
training loss: 2.8337011337280273
training loss: 2.860151529312134
training loss: 2.8902018070220947
training loss: 2.935415029525757
training loss: 2.894709587097168
training loss: 2.8473291397094727


training:  53%|███████████████████████████████████████████████████████████████▋                                                        | 481/907 [06:14<09:28,  1.33s/it]

training loss: 2.8966004848480225
validation loss: 2.8815758228302
training loss: 2.8521687984466553
training loss: 2.8726348876953125
training loss: 2.886723518371582
training loss: 2.870452642440796
training loss: 2.848097324371338
training loss: 2.8922722339630127
training loss: 2.8541455268859863


training:  54%|████████████████████████████████████████████████████████████████▋                                                       | 489/907 [06:26<09:27,  1.36s/it]

training loss: 2.913724899291992
training loss: 2.9049768447875977
training loss: 2.894733190536499
training loss: 2.884253740310669
training loss: 2.928612232208252
validation loss: 2.8349449634552
training loss: 2.865462303161621
training loss: 2.9305596351623535
training loss: 2.870594024658203


training:  55%|█████████████████████████████████████████████████████████████████▊                                                      | 497/907 [06:37<09:25,  1.38s/it]

training loss: 2.8595616817474365
training loss: 2.8826844692230225
training loss: 2.8742454051971436
training loss: 2.841573715209961
training loss: 2.886903762817383
training loss: 2.9073781967163086
training loss: 2.864824056625366
training loss: 2.9775915145874023


training:  56%|██████████████████████████████████████████████████████████████████▊                                                     | 505/907 [06:48<09:20,  1.39s/it]

training loss: 2.8797926902770996
validation loss: 2.91058611869812
training loss: 2.916109800338745
training loss: 2.856635808944702
training loss: 2.8822944164276123
training loss: 2.8973731994628906
training loss: 2.8802409172058105
training loss: 2.867485523223877
training loss: 2.914639472961426


training:  57%|███████████████████████████████████████████████████████████████████▊                                                    | 513/907 [07:00<09:10,  1.40s/it]

training loss: 2.8681252002716064
training loss: 2.8633530139923096
training loss: 2.9154765605926514
training loss: 2.9035584926605225
training loss: 2.896070957183838
validation loss: 2.8496828079223633
training loss: 2.8940327167510986
training loss: 2.9417240619659424
training loss: 2.881165027618408


training:  57%|████████████████████████████████████████████████████████████████████▉                                                   | 521/907 [07:11<09:02,  1.40s/it]

training loss: 2.9079833030700684
training loss: 2.910642147064209
training loss: 2.869541645050049
training loss: 2.894859552383423
training loss: 2.863434076309204
training loss: 2.875439405441284
training loss: 2.893310546875
training loss: 2.935349464416504


training:  58%|█████████████████████████████████████████████████████████████████████▉                                                  | 529/907 [07:23<08:53,  1.41s/it]

training loss: 2.9004969596862793
validation loss: 2.904477119445801
training loss: 2.909510612487793
training loss: 2.866393566131592
training loss: 2.8754844665527344
training loss: 2.8771259784698486
training loss: 2.877845287322998
training loss: 2.8810863494873047
training loss: 2.879265785217285


training:  59%|███████████████████████████████████████████████████████████████████████                                                 | 537/907 [07:34<08:42,  1.41s/it]

training loss: 2.8794493675231934
training loss: 2.9156980514526367
training loss: 2.855027675628662
training loss: 2.8280375003814697
training loss: 2.8442139625549316
validation loss: 2.8172860145568848
training loss: 2.8985655307769775
training loss: 2.865985631942749
training loss: 2.9219586849212646


training:  60%|████████████████████████████████████████████████████████████████████████                                                | 545/907 [07:45<08:33,  1.42s/it]

training loss: 2.8586032390594482
training loss: 2.9780290126800537
training loss: 2.8894765377044678
training loss: 2.8449156284332275
training loss: 2.8561551570892334
training loss: 2.877192735671997
training loss: 2.846339702606201
training loss: 2.885845422744751


training:  61%|█████████████████████████████████████████████████████████████████████████▏                                              | 553/907 [07:57<08:23,  1.42s/it]

training loss: 2.857114791870117
validation loss: 2.919344902038574
training loss: 2.8772053718566895
training loss: 2.9344594478607178
training loss: 2.895333766937256
training loss: 2.8778316974639893
training loss: 2.8885018825531006
training loss: 2.860888719558716
training loss: 2.874319076538086


training:  62%|██████████████████████████████████████████████████████████████████████████▏                                             | 561/907 [08:08<08:10,  1.42s/it]

training loss: 2.904155731201172
training loss: 2.915796995162964
training loss: 2.8965001106262207
training loss: 2.867123603820801
training loss: 2.8972222805023193
validation loss: 2.889747142791748
training loss: 2.897585153579712
training loss: 2.8586695194244385
training loss: 2.905078887939453


training:  63%|███████████████████████████████████████████████████████████████████████████▎                                            | 569/907 [08:19<07:55,  1.41s/it]

training loss: 2.909269332885742
training loss: 2.9237892627716064
training loss: 2.881396770477295
training loss: 2.896829128265381
training loss: 2.9198131561279297
training loss: 2.889881134033203
training loss: 2.8903181552886963
training loss: 2.9211833477020264


training:  64%|████████████████████████████████████████████████████████████████████████████▎                                           | 577/907 [08:30<07:46,  1.41s/it]

training loss: 2.925950765609741
validation loss: 2.8601982593536377
training loss: 2.8271372318267822
training loss: 2.8868582248687744
training loss: 2.878509759902954
training loss: 2.8748512268066406
training loss: 2.8717520236968994
training loss: 2.8677005767822266
training loss: 2.8868649005889893


training:  64%|█████████████████████████████████████████████████████████████████████████████▍                                          | 585/907 [08:42<07:35,  1.41s/it]

training loss: 2.911898374557495
training loss: 2.9076943397521973
training loss: 3.0072808265686035
training loss: 2.8443589210510254
training loss: 2.898721694946289
validation loss: 2.9003772735595703
training loss: 2.852344036102295
training loss: 2.953700542449951
training loss: 2.9277658462524414


training:  65%|██████████████████████████████████████████████████████████████████████████████▍                                         | 593/907 [08:53<07:25,  1.42s/it]

training loss: 2.9611780643463135
training loss: 2.9051101207733154
training loss: 2.888213872909546
training loss: 2.9638779163360596
training loss: 2.868920087814331
training loss: 2.8968615531921387
training loss: 2.9065449237823486
training loss: 2.9258761405944824


training:  66%|███████████████████████████████████████████████████████████████████████████████▌                                        | 601/907 [09:05<07:13,  1.42s/it]

training loss: 2.8949685096740723
validation loss: 2.937741994857788
training loss: 2.8994829654693604
training loss: 2.877948045730591
training loss: 2.8675734996795654
training loss: 2.879009485244751
training loss: 2.8974597454071045
training loss: 2.900686264038086
training loss: 2.8614771366119385


training:  67%|████████████████████████████████████████████████████████████████████████████████▌                                       | 609/907 [09:16<07:02,  1.42s/it]

training loss: 2.8666274547576904
training loss: 2.892735719680786
training loss: 2.891897201538086
training loss: 2.9194934368133545
training loss: 2.8980612754821777
validation loss: 2.8912527561187744
training loss: 2.9044172763824463
training loss: 2.8643836975097656
training loss: 2.884199380874634


training:  68%|█████████████████████████████████████████████████████████████████████████████████▋                                      | 617/907 [09:27<06:51,  1.42s/it]

training loss: 2.9334065914154053
training loss: 2.879688262939453
training loss: 2.8530783653259277
training loss: 2.920201301574707
training loss: 2.8753323554992676
training loss: 2.8945467472076416
training loss: 2.884111166000366
training loss: 2.8699567317962646


training:  69%|██████████████████████████████████████████████████████████████████████████████████▋                                     | 625/907 [09:39<06:40,  1.42s/it]

training loss: 2.9161691665649414
validation loss: 2.9095242023468018
training loss: 2.8906478881835938
training loss: 2.895972728729248
training loss: 2.85158109664917
training loss: 2.8822388648986816
training loss: 2.9088327884674072
training loss: 2.8631057739257812
training loss: 2.8869166374206543


training:  70%|███████████████████████████████████████████████████████████████████████████████████▋                                    | 633/907 [09:50<06:28,  1.42s/it]

training loss: 2.928717851638794
training loss: 2.867173910140991
training loss: 2.861463785171509
training loss: 2.862969398498535
training loss: 2.856543779373169
validation loss: 2.9598922729492188
training loss: 2.87383770942688
training loss: 2.927885055541992
training loss: 2.869133949279785


training:  71%|████████████████████████████████████████████████████████████████████████████████████▊                                   | 641/907 [10:01<06:17,  1.42s/it]

training loss: 2.9042840003967285
training loss: 2.8739590644836426
training loss: 2.886882781982422
training loss: 2.8772940635681152
training loss: 2.8903470039367676
training loss: 2.874464273452759
training loss: 2.9371395111083984
training loss: 2.886263370513916


training:  72%|█████████████████████████████████████████████████████████████████████████████████████▊                                  | 649/907 [10:13<06:06,  1.42s/it]

training loss: 2.867966651916504
validation loss: 2.8801701068878174
training loss: 2.917297601699829
training loss: 2.885312080383301
training loss: 2.869889736175537
training loss: 2.9001424312591553
training loss: 2.8987948894500732
training loss: 2.8637092113494873
training loss: 2.87526798248291


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▉                                 | 657/907 [10:24<05:55,  1.42s/it]

training loss: 2.879696846008301
training loss: 2.866716146469116
training loss: 2.8657031059265137
training loss: 2.8817012310028076
training loss: 2.875596046447754
validation loss: 2.8782918453216553
training loss: 2.957442045211792
training loss: 2.8548197746276855
training loss: 2.9169199466705322


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▉                                | 665/907 [10:36<05:44,  1.42s/it]

training loss: 2.8495850563049316
training loss: 2.8721201419830322
training loss: 2.8864219188690186
training loss: 2.882678508758545
training loss: 2.8783884048461914
training loss: 2.9049341678619385
training loss: 2.8589344024658203
training loss: 2.9078962802886963


training:  74%|█████████████████████████████████████████████████████████████████████████████████████████                               | 673/907 [10:47<05:33,  1.43s/it]

training loss: 2.8821277618408203
validation loss: 2.9352660179138184
training loss: 2.9105780124664307
training loss: 2.897249221801758
training loss: 2.8620898723602295
training loss: 2.9068002700805664
training loss: 2.9400672912597656
training loss: 2.865104913711548
training loss: 2.8463640213012695


training:  75%|██████████████████████████████████████████████████████████████████████████████████████████                              | 681/907 [10:58<05:21,  1.42s/it]

training loss: 2.8687076568603516
training loss: 2.897108793258667
training loss: 2.910233497619629
training loss: 2.8994762897491455
training loss: 2.91388201713562
validation loss: 2.9290452003479004
training loss: 2.90411114692688
training loss: 2.8967463970184326
training loss: 2.88864803314209


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▏                            | 689/907 [11:10<05:11,  1.43s/it]

training loss: 2.854130268096924
training loss: 2.8515055179595947
training loss: 2.8929011821746826
training loss: 2.906543016433716
training loss: 2.9032905101776123
training loss: 2.842921257019043
training loss: 2.9009673595428467
training loss: 2.913257122039795


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▏                           | 697/907 [11:21<04:59,  1.43s/it]

training loss: 2.8892951011657715
validation loss: 2.8635809421539307
training loss: 2.8724725246429443
training loss: 2.9077372550964355
training loss: 2.859483242034912
training loss: 2.8594014644622803
training loss: 2.934286117553711
training loss: 2.88210391998291
training loss: 2.882301092147827


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▎                          | 705/907 [11:33<04:47,  1.42s/it]

training loss: 2.8809971809387207
training loss: 2.8533823490142822
training loss: 2.8826088905334473
training loss: 2.891787528991699
training loss: 2.9029150009155273
validation loss: 2.8573904037475586
training loss: 2.9235148429870605
training loss: 2.9123337268829346
training loss: 2.865949869155884


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▎                         | 713/907 [11:44<04:36,  1.43s/it]

training loss: 2.8647055625915527
training loss: 2.9111647605895996
training loss: 2.9159204959869385
training loss: 2.846541166305542
training loss: 2.934964418411255
training loss: 2.909147024154663
training loss: 2.858555316925049
training loss: 2.867530107498169


training:  79%|███████████████████████████████████████████████████████████████████████████████████████████████▍                        | 721/907 [11:55<04:25,  1.43s/it]

training loss: 2.8587002754211426
validation loss: 2.892489194869995
training loss: 2.883711576461792
training loss: 2.8677356243133545
training loss: 2.882431983947754
training loss: 2.8620855808258057
training loss: 2.8506555557250977
training loss: 2.8276588916778564
training loss: 2.92240047454834


training:  80%|████████████████████████████████████████████████████████████████████████████████████████████████▍                       | 729/907 [12:07<04:13,  1.42s/it]

training loss: 2.8463072776794434
training loss: 2.895062208175659
training loss: 2.9012773036956787
training loss: 2.890441417694092
training loss: 2.854602336883545
validation loss: 2.895369529724121
training loss: 2.889906167984009
training loss: 2.8664565086364746
training loss: 2.8976852893829346


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████▌                      | 737/907 [12:18<04:01,  1.42s/it]

training loss: 2.8503506183624268
training loss: 2.869215726852417
training loss: 2.8729212284088135
training loss: 2.836822032928467
training loss: 2.940066337585449
training loss: 2.8580214977264404
training loss: 2.894458055496216
training loss: 2.9227006435394287


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▌                     | 745/907 [12:29<03:50,  1.42s/it]

training loss: 2.886309862136841
validation loss: 2.850659132003784
training loss: 2.8958401679992676
training loss: 2.886641025543213
training loss: 2.9329440593719482
training loss: 2.881673574447632
training loss: 2.8699588775634766
training loss: 2.8952879905700684
training loss: 2.8597114086151123


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 753/907 [12:41<03:38,  1.42s/it]

training loss: 2.9270918369293213
training loss: 2.9191360473632812
training loss: 2.894232988357544
training loss: 2.853809118270874
training loss: 2.879770517349243
validation loss: 2.886535406112671
training loss: 2.8255321979522705
training loss: 2.8963186740875244
training loss: 2.909794569015503


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 761/907 [12:52<03:27,  1.42s/it]

training loss: 2.8733670711517334
training loss: 2.8995275497436523
training loss: 2.857203960418701
training loss: 2.871696710586548
training loss: 2.891808032989502
training loss: 2.8963465690612793
training loss: 2.934297800064087
training loss: 2.8975887298583984


training:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 769/907 [13:04<03:16,  1.42s/it]

training loss: 2.864299774169922
validation loss: 2.8721604347229004
training loss: 2.9109485149383545
training loss: 2.8609774112701416
training loss: 2.90551495552063
training loss: 2.8704748153686523
training loss: 2.8760688304901123
training loss: 2.89449143409729
training loss: 2.9004199504852295


training:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                 | 777/907 [13:15<03:04,  1.42s/it]

training loss: 2.8983800411224365
training loss: 2.923892021179199
training loss: 2.8756837844848633
training loss: 2.8765807151794434
training loss: 2.873255968093872
validation loss: 2.8826184272766113
training loss: 2.8857102394104004
training loss: 2.9056754112243652
training loss: 2.862231969833374


training:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 785/907 [13:26<02:53,  1.43s/it]

training loss: 2.8999929428100586
training loss: 2.9059946537017822
training loss: 2.898919105529785
training loss: 2.89943790435791
training loss: 2.9290618896484375
training loss: 2.872737169265747
training loss: 2.9174141883850098
training loss: 2.8917040824890137


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 793/907 [13:38<02:42,  1.43s/it]

training loss: 2.8124351501464844
validation loss: 2.855029582977295
training loss: 2.883493423461914
training loss: 2.887566566467285
training loss: 2.8878250122070312
training loss: 2.8839170932769775
training loss: 2.8727970123291016
training loss: 2.8807361125946045
training loss: 2.8698630332946777


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 801/907 [13:49<02:31,  1.43s/it]

training loss: 2.9322311878204346
training loss: 2.8916218280792236
training loss: 2.9008233547210693
training loss: 2.8850371837615967
training loss: 2.887787342071533
validation loss: 2.878551721572876
training loss: 2.870495319366455
training loss: 2.9081249237060547
training loss: 2.8115105628967285


training:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████             | 809/907 [14:01<02:19,  1.43s/it]

training loss: 2.8508143424987793
training loss: 2.903968572616577
training loss: 2.8926808834075928
training loss: 2.8296432495117188
training loss: 2.8705966472625732
training loss: 2.849036931991577
training loss: 2.8335375785827637
training loss: 2.874225616455078


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 817/907 [14:12<02:08,  1.43s/it]

training loss: 2.894630193710327
validation loss: 2.8722407817840576
training loss: 2.8523621559143066
training loss: 2.8908188343048096
training loss: 2.856584310531616
training loss: 2.9131155014038086
training loss: 2.900956392288208
training loss: 2.9024040699005127
training loss: 2.8852622509002686


training:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▏          | 825/907 [14:23<01:56,  1.42s/it]

training loss: 2.8630409240722656
training loss: 2.8735814094543457
training loss: 2.8765804767608643
training loss: 2.8811569213867188
training loss: 2.8927628993988037
validation loss: 2.892240285873413
training loss: 2.897454261779785
training loss: 2.8949098587036133
training loss: 2.879985809326172


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▏         | 833/907 [14:35<01:45,  1.43s/it]

training loss: 2.860429286956787
training loss: 2.8671152591705322
training loss: 2.8944520950317383
training loss: 2.8609731197357178
training loss: 2.8514564037323
training loss: 2.897656202316284
training loss: 2.8732287883758545
training loss: 2.8677361011505127


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 841/907 [14:46<01:34,  1.43s/it]

training loss: 2.8662586212158203
validation loss: 2.8496453762054443
training loss: 2.8782994747161865
training loss: 2.88474702835083
training loss: 2.890672206878662
training loss: 2.911759614944458
training loss: 2.8459503650665283
training loss: 2.925060272216797
training loss: 2.888915777206421


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 849/907 [14:58<01:22,  1.42s/it]

training loss: 2.8747012615203857
training loss: 2.944099187850952
training loss: 2.859532356262207
training loss: 2.8841278553009033
training loss: 2.8865621089935303
validation loss: 2.908576488494873
training loss: 2.874101400375366
training loss: 2.9116430282592773
training loss: 2.8801138401031494


training:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 857/907 [15:09<01:11,  1.42s/it]

training loss: 2.8598811626434326
training loss: 2.868542194366455
training loss: 2.8946919441223145
training loss: 2.9282336235046387
training loss: 2.8549234867095947
training loss: 2.90287184715271
training loss: 2.938615083694458
training loss: 2.9138715267181396


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍     | 865/907 [15:20<00:59,  1.42s/it]

training loss: 2.85440731048584
validation loss: 2.8745534420013428
training loss: 2.908377170562744
training loss: 2.865644693374634
training loss: 2.9601328372955322
training loss: 2.866702079772949
training loss: 2.864192008972168
training loss: 2.8673243522644043
training loss: 2.9060862064361572


training:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 873/907 [15:32<00:48,  1.42s/it]

training loss: 2.862865447998047
training loss: 2.8806445598602295
training loss: 2.918149948120117
training loss: 2.919793128967285
training loss: 2.8633084297180176
validation loss: 2.9306223392486572
training loss: 3.1347954273223877
training loss: 2.8890883922576904
training loss: 2.949066162109375


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 881/907 [15:43<00:36,  1.42s/it]

training loss: 2.8993678092956543
training loss: 2.928187847137451
training loss: 2.884946823120117
training loss: 2.879153251647949
training loss: 2.8261945247650146
training loss: 2.9255125522613525
training loss: 2.8730690479278564
training loss: 2.898069143295288


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 889/907 [15:54<00:25,  1.42s/it]

training loss: 2.8885252475738525
validation loss: 2.863115072250366
training loss: 2.889711618423462
training loss: 2.8997814655303955
training loss: 2.8904166221618652
training loss: 2.8914852142333984
training loss: 2.8988687992095947
training loss: 2.898378849029541
training loss: 2.900296211242676


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 897/907 [16:06<00:14,  1.42s/it]

training loss: 2.9079079627990723
training loss: 2.9044249057769775
training loss: 2.8608219623565674
training loss: 2.9214067459106445
training loss: 2.8985934257507324
validation loss: 2.8885045051574707
training loss: 2.8617825508117676
training loss: 2.9280471801757812
training loss: 2.89799165725708


training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 905/907 [16:17<00:02,  1.42s/it]

training loss: 2.902768135070801
training loss: 2.871497869491577


training: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 907/907 [16:20<00:00,  1.08s/it]
training:   0%|                                                                                                                                  | 0/907 [00:00<?, ?it/s]

training loss: 2.8952980041503906
training loss: 2.861273765563965
validation loss: 2.880032539367676
training loss: 2.9051201343536377
training loss: 2.889707565307617
training loss: 2.8821828365325928
training loss: 2.891268014907837
training loss: 2.871211051940918


training:   1%|▉                                                                                                                         | 7/907 [00:10<21:26,  1.43s/it]

training loss: 2.9305663108825684
training loss: 2.9485344886779785
training loss: 2.913431406021118
training loss: 2.851163148880005
training loss: 2.8815979957580566
training loss: 2.9432930946350098
training loss: 2.92161226272583
validation loss: 2.9129841327667236


training:   2%|█▊                                                                                                                       | 14/907 [00:20<21:17,  1.43s/it]

training loss: 2.8842954635620117
training loss: 2.8988115787506104
training loss: 2.879385471343994
training loss: 2.8644516468048096
training loss: 2.9395503997802734
training loss: 2.8557960987091064
training loss: 2.9210875034332275
training loss: 2.878077507019043


training:   2%|██▉                                                                                                                      | 22/907 [00:31<20:59,  1.42s/it]

training loss: 2.8915421962738037
training loss: 2.8911211490631104
training loss: 2.8411710262298584
training loss: 2.8679919242858887
validation loss: 2.888216972351074
training loss: 2.9126951694488525
training loss: 2.9142136573791504
training loss: 2.884428024291992
training loss: 2.8791122436523438


training:   3%|████                                                                                                                     | 30/907 [00:42<20:49,  1.42s/it]

training loss: 2.8378539085388184
training loss: 2.903684616088867
training loss: 2.855862617492676
training loss: 2.894280195236206
training loss: 2.8934214115142822
training loss: 2.896907329559326
training loss: 2.8545761108398438
training loss: 2.8820903301239014
validation loss: 2.8373217582702637


training:   4%|█████                                                                                                                    | 38/907 [00:54<20:37,  1.42s/it]

training loss: 2.878100633621216
training loss: 2.871957778930664
training loss: 2.9233386516571045
training loss: 2.8921589851379395
training loss: 2.8598711490631104
training loss: 2.885035753250122
training loss: 2.874680995941162
training loss: 2.8854384422302246


training:   5%|██████▏                                                                                                                  | 46/907 [01:05<20:21,  1.42s/it]

training loss: 2.8259425163269043
training loss: 2.844987392425537
training loss: 2.946274518966675
training loss: 2.8770806789398193
validation loss: 2.879162311553955
training loss: 2.8428118228912354
training loss: 2.8365213871002197
training loss: 2.8623299598693848
training loss: 2.830535411834717


training:   6%|███████▏                                                                                                                 | 54/907 [01:16<20:13,  1.42s/it]

training loss: 2.935356616973877
training loss: 2.8568453788757324
training loss: 2.872485876083374
training loss: 2.881654977798462
training loss: 2.9008138179779053
training loss: 2.876132011413574
training loss: 2.8688786029815674


training:   7%|████████▏                                                                                                                | 61/907 [01:26<20:08,  1.43s/it]

training loss: 2.913670539855957
validation loss: 2.874713897705078
training loss: 2.8865950107574463
training loss: 2.9178953170776367
training loss: 2.895705461502075
training loss: 2.8797693252563477
training loss: 2.893622398376465
training loss: 2.8560235500335693
training loss: 2.8702199459075928


training:   8%|█████████▏                                                                                                               | 69/907 [01:38<19:52,  1.42s/it]

training loss: 2.8610332012176514
training loss: 2.858752727508545
training loss: 2.8858234882354736
training loss: 2.8761632442474365
training loss: 2.8589112758636475
validation loss: 2.9128670692443848
training loss: 2.896362066268921
training loss: 2.925602912902832
training loss: 2.9241623878479004


training:   8%|██████████▎                                                                                                              | 77/907 [01:49<19:43,  1.43s/it]

training loss: 2.9074833393096924
training loss: 2.899146318435669
training loss: 2.8928050994873047
training loss: 2.916492223739624
training loss: 2.893929958343506
training loss: 2.936260223388672
training loss: 2.840116262435913
training loss: 2.842550754547119


training:   9%|███████████▎                                                                                                             | 85/907 [02:01<19:33,  1.43s/it]

training loss: 2.873414993286133
validation loss: 2.9467415809631348
training loss: 2.911184310913086
training loss: 2.8625524044036865
training loss: 2.9140114784240723
training loss: 2.8531768321990967
training loss: 2.8522236347198486
training loss: 2.907712697982788
training loss: 2.87666392326355


training:  10%|████████████▍                                                                                                            | 93/907 [02:12<19:17,  1.42s/it]

training loss: 2.9231116771698
training loss: 2.887672185897827
training loss: 2.844477415084839
training loss: 2.8985726833343506
training loss: 2.8702073097229004
validation loss: 2.9130160808563232
training loss: 2.8933703899383545
training loss: 2.878951072692871
training loss: 2.904475688934326


training:  11%|█████████████▎                                                                                                          | 101/907 [02:23<19:07,  1.42s/it]

training loss: 2.8755946159362793
training loss: 2.9211504459381104
training loss: 2.912447929382324
training loss: 2.8948256969451904
training loss: 2.895048141479492
training loss: 2.866565465927124
training loss: 2.885160446166992
training loss: 2.900377035140991


training:  12%|██████████████▍                                                                                                         | 109/907 [02:35<18:56,  1.42s/it]

training loss: 2.918545961380005
validation loss: 2.896430730819702
training loss: 2.921288251876831
training loss: 2.9188811779022217
training loss: 2.8966357707977295
training loss: 2.855208396911621
training loss: 2.96771240234375
training loss: 2.9413156509399414
training loss: 2.8878118991851807


training:  13%|███████████████▍                                                                                                        | 117/907 [02:46<18:40,  1.42s/it]

training loss: 2.9276070594787598
training loss: 2.8791370391845703
training loss: 2.912163019180298
training loss: 2.8961493968963623
training loss: 2.8986079692840576
validation loss: 2.8999176025390625
training loss: 2.880978584289551
training loss: 2.8981151580810547
training loss: 2.889927625656128


training:  14%|████████████████▌                                                                                                       | 125/907 [02:57<18:31,  1.42s/it]

training loss: 2.8621485233306885
training loss: 2.883349895477295
training loss: 2.8403356075286865
training loss: 2.8738510608673096
training loss: 2.932723045349121
training loss: 2.863521099090576
training loss: 2.830024480819702
training loss: 2.8644931316375732


training:  15%|█████████████████▌                                                                                                      | 133/907 [03:09<18:22,  1.42s/it]

training loss: 2.903153419494629
validation loss: 2.9584248065948486
training loss: 2.843280076980591
training loss: 2.8405559062957764
training loss: 2.879532814025879
training loss: 2.8592700958251953
training loss: 2.9241175651550293
training loss: 2.9034736156463623
training loss: 2.8678741455078125


training:  16%|██████████████████▋                                                                                                     | 141/907 [03:20<18:09,  1.42s/it]

training loss: 2.88765287399292
training loss: 2.906639814376831
training loss: 2.8797099590301514
training loss: 2.9266021251678467
training loss: 2.9074811935424805
validation loss: 2.8653595447540283
training loss: 2.892544746398926
training loss: 2.877080202102661
training loss: 2.9037692546844482


training:  16%|███████████████████▋                                                                                                    | 149/907 [03:32<18:02,  1.43s/it]

training loss: 2.865330934524536
training loss: 2.8656370639801025
training loss: 2.905395030975342
training loss: 2.840789318084717
training loss: 2.9044673442840576
training loss: 2.875267267227173
training loss: 2.835106372833252
training loss: 2.886469602584839


training:  17%|████████████████████▊                                                                                                   | 157/907 [03:43<17:52,  1.43s/it]

training loss: 2.833436965942383
validation loss: 2.87357497215271
training loss: 2.875361442565918
training loss: 2.9060251712799072
training loss: 2.917022705078125
training loss: 2.909973382949829
training loss: 2.872145891189575
training loss: 2.8816752433776855
training loss: 2.8468246459960938


training:  18%|█████████████████████▊                                                                                                  | 165/907 [03:55<17:41,  1.43s/it]

training loss: 2.873028516769409
training loss: 2.883302927017212
training loss: 2.8725318908691406
training loss: 2.8482789993286133
training loss: 2.8921732902526855
validation loss: 2.8983967304229736
training loss: 2.903632402420044
training loss: 2.8640029430389404
training loss: 2.9414782524108887


training:  19%|██████████████████████▉                                                                                                 | 173/907 [04:06<17:31,  1.43s/it]

training loss: 2.903041362762451
training loss: 2.869419813156128
training loss: 2.82126784324646
training loss: 2.8643224239349365
training loss: 2.9289488792419434
training loss: 2.872170925140381
training loss: 2.8569653034210205
training loss: 2.877833366394043


training:  20%|███████████████████████▉                                                                                                | 181/907 [04:18<17:21,  1.43s/it]

training loss: 2.8650779724121094
validation loss: 2.8750836849212646
training loss: 2.875683546066284
training loss: 2.8536500930786133
training loss: 2.89481782913208
training loss: 2.875840663909912
training loss: 2.859309196472168
training loss: 2.9085097312927246


training:  21%|████████████████████████▊                                                                                               | 188/907 [04:28<17:11,  1.43s/it]

training loss: 2.9131343364715576
training loss: 2.8578391075134277
training loss: 2.879739284515381
training loss: 2.8925845623016357
training loss: 2.879084587097168
training loss: 2.869046688079834
validation loss: 2.895634889602661
training loss: 2.947895050048828


training:  21%|█████████████████████████▊                                                                                              | 195/907 [04:38<17:02,  1.44s/it]

training loss: 2.9079320430755615
training loss: 2.8991687297821045
training loss: 2.8486616611480713
training loss: 2.8775360584259033
training loss: 2.846259832382202
training loss: 2.892186403274536
training loss: 2.9392056465148926
training loss: 2.8796699047088623


training:  22%|██████████████████████████▊                                                                                             | 203/907 [04:49<16:48,  1.43s/it]

training loss: 2.9034719467163086
training loss: 2.9126501083374023
training loss: 2.872633695602417
validation loss: 2.878183126449585
training loss: 2.885760545730591
training loss: 2.9028570652008057
training loss: 2.883174419403076
training loss: 2.898676872253418
training loss: 2.8972036838531494


training:  23%|███████████████████████████▉                                                                                            | 211/907 [05:01<16:38,  1.43s/it]

training loss: 2.9031808376312256
training loss: 2.907837390899658
training loss: 2.8668413162231445
training loss: 2.873385429382324
training loss: 2.8775031566619873
training loss: 2.8771960735321045
training loss: 2.8803651332855225
validation loss: 2.902085304260254


training:  24%|████████████████████████████▊                                                                                           | 218/907 [05:11<16:31,  1.44s/it]

training loss: 2.8499951362609863
training loss: 2.9133574962615967
training loss: 2.8616769313812256
training loss: 2.8860726356506348
training loss: 2.8906643390655518
training loss: 2.8194401264190674
training loss: 2.9014928340911865
training loss: 2.8673341274261475


training:  25%|█████████████████████████████▉                                                                                          | 226/907 [05:22<16:16,  1.43s/it]

training loss: 2.921356439590454
training loss: 2.859435796737671
training loss: 2.983999729156494
training loss: 2.8890559673309326
validation loss: 2.881856679916382
training loss: 2.8313703536987305
training loss: 2.898084878921509
training loss: 2.919480800628662
training loss: 2.9154930114746094


training:  26%|██████████████████████████████▉                                                                                         | 234/907 [05:34<16:08,  1.44s/it]

training loss: 2.865647077560425
training loss: 2.9157636165618896
training loss: 2.8750932216644287
training loss: 2.9350807666778564
training loss: 2.8946151733398438
training loss: 2.879779815673828
training loss: 2.8876020908355713


training:  27%|███████████████████████████████▉                                                                                        | 241/907 [05:44<16:01,  1.44s/it]

training loss: 2.8614003658294678
validation loss: 2.9431040287017822
training loss: 2.862210273742676
training loss: 2.9297678470611572
training loss: 2.9068472385406494
training loss: 2.8411245346069336
training loss: 2.926851511001587
training loss: 2.8815462589263916
training loss: 2.902291774749756


training:  27%|████████████████████████████████▉                                                                                       | 249/907 [05:55<15:41,  1.43s/it]

training loss: 2.8588333129882812
training loss: 2.9101617336273193
training loss: 2.9063003063201904
training loss: 2.919365882873535
training loss: 2.884049415588379
validation loss: 2.909252882003784
training loss: 2.883924722671509
training loss: 2.843254566192627
training loss: 2.8481287956237793


training:  28%|██████████████████████████████████                                                                                      | 257/907 [06:07<15:32,  1.43s/it]

training loss: 2.8827850818634033
training loss: 2.902318000793457
training loss: 2.860529661178589
training loss: 2.8294661045074463
training loss: 2.8936376571655273
training loss: 2.8855977058410645
training loss: 2.874683141708374
training loss: 2.8734779357910156


training:  29%|███████████████████████████████████                                                                                     | 265/907 [06:18<15:21,  1.43s/it]

training loss: 2.8574752807617188
validation loss: 2.940335512161255
training loss: 2.8858132362365723
training loss: 2.95831298828125
training loss: 2.891378879547119
training loss: 2.882627487182617
training loss: 2.91611909866333
training loss: 2.944549560546875
training loss: 2.8892738819122314


training:  30%|████████████████████████████████████                                                                                    | 273/907 [06:30<15:07,  1.43s/it]

training loss: 2.892242908477783
training loss: 2.904505968093872
training loss: 2.981931686401367
training loss: 2.915104389190674
training loss: 2.886227607727051
validation loss: 2.8880937099456787
training loss: 2.884272336959839
training loss: 2.8718411922454834
training loss: 2.85361647605896


training:  31%|█████████████████████████████████████▏                                                                                  | 281/907 [06:41<14:57,  1.43s/it]

training loss: 2.918997287750244
training loss: 2.8766727447509766
training loss: 2.9466731548309326
training loss: 2.9041831493377686
training loss: 2.8887460231781006
training loss: 2.874725341796875
training loss: 2.872485876083374


training:  32%|██████████████████████████████████████                                                                                  | 288/907 [06:51<14:46,  1.43s/it]

training loss: 2.9073221683502197
training loss: 2.894573211669922
validation loss: 2.8721070289611816
training loss: 2.8984222412109375
training loss: 2.8701231479644775
training loss: 2.908804416656494
training loss: 2.887636423110962
training loss: 2.888972282409668


training:  33%|███████████████████████████████████████                                                                                 | 295/907 [07:01<14:37,  1.43s/it]

training loss: 2.8949739933013916
training loss: 2.9057109355926514
training loss: 2.861649990081787
training loss: 2.885796546936035
training loss: 2.9312381744384766
training loss: 2.875505208969116
training loss: 2.850982904434204
validation loss: 2.8972134590148926


training:  33%|███████████████████████████████████████▉                                                                                | 302/907 [07:11<14:27,  1.43s/it]

training loss: 2.8969268798828125
training loss: 2.8682942390441895
training loss: 2.8823418617248535
training loss: 2.911597490310669
training loss: 2.8987812995910645
training loss: 2.9130241870880127
training loss: 2.8939502239227295


training:  34%|████████████████████████████████████████▉                                                                               | 309/907 [07:21<14:17,  1.43s/it]

training loss: 2.894113302230835
training loss: 2.8466813564300537
training loss: 2.884216070175171
training loss: 2.904454231262207
training loss: 2.871717691421509
validation loss: 2.843291997909546
training loss: 2.8586366176605225
training loss: 2.904568910598755


training:  35%|█████████████████████████████████████████▊                                                                              | 316/907 [07:31<14:10,  1.44s/it]

training loss: 2.8608813285827637
training loss: 2.8814046382904053
training loss: 2.853019952774048
training loss: 2.9604721069335938
training loss: 2.8792731761932373
training loss: 2.92983341217041
training loss: 2.866562604904175
training loss: 2.9033589363098145


training:  36%|██████████████████████████████████████████▊                                                                             | 324/907 [07:43<13:55,  1.43s/it]

training loss: 2.8746962547302246
training loss: 2.886129140853882
validation loss: 2.9096462726593018
training loss: 2.861985206604004
training loss: 2.895564079284668
training loss: 2.8831708431243896
training loss: 2.913097381591797
training loss: 2.887641191482544
training loss: 2.877375364303589


training:  37%|███████████████████████████████████████████▉                                                                            | 332/907 [07:54<13:45,  1.44s/it]

training loss: 2.918227195739746
training loss: 2.887180805206299
training loss: 2.8718554973602295
training loss: 2.903297185897827
training loss: 2.898529291152954
training loss: 2.8601574897766113
validation loss: 2.8852591514587402
training loss: 2.8241100311279297


training:  37%|████████████████████████████████████████████▊                                                                           | 339/907 [08:04<13:37,  1.44s/it]

training loss: 2.8767824172973633
training loss: 2.9212517738342285
training loss: 2.879183530807495
training loss: 2.9256253242492676
training loss: 2.878797769546509
training loss: 2.953928232192993
training loss: 2.8562588691711426
training loss: 2.916126012802124


training:  38%|█████████████████████████████████████████████▉                                                                          | 347/907 [08:16<13:23,  1.43s/it]

training loss: 2.8443613052368164
training loss: 2.8731346130371094
training loss: 2.888155460357666
validation loss: 2.9360485076904297
training loss: 2.9057562351226807
training loss: 2.8709304332733154
training loss: 2.8664400577545166
training loss: 2.8805148601531982
training loss: 2.8661606311798096


training:  39%|██████████████████████████████████████████████▉                                                                         | 355/907 [08:27<13:11,  1.43s/it]

training loss: 2.938244581222534
training loss: 2.9175407886505127
training loss: 2.9027907848358154
training loss: 2.8666443824768066
training loss: 2.909543752670288
training loss: 2.9397802352905273
training loss: 2.862466335296631
validation loss: 2.8798258304595947


training:  40%|███████████████████████████████████████████████▉                                                                        | 362/907 [08:37<13:02,  1.44s/it]

training loss: 2.893099546432495
training loss: 2.848543405532837
training loss: 2.88694429397583
training loss: 2.893761157989502
training loss: 2.8822929859161377
training loss: 2.9288578033447266
training loss: 2.9058609008789062
training loss: 2.895589828491211


training:  41%|████████████████████████████████████████████████▉                                                                       | 370/907 [08:49<12:48,  1.43s/it]

training loss: 2.88429594039917
training loss: 2.8436195850372314
training loss: 2.8508212566375732
training loss: 2.895991086959839
validation loss: 2.837536096572876
training loss: 2.8741750717163086
training loss: 2.8936712741851807
training loss: 2.901174306869507
training loss: 2.8246004581451416


training:  42%|██████████████████████████████████████████████████                                                                      | 378/907 [09:00<12:38,  1.43s/it]

training loss: 2.8786778450012207
training loss: 2.8687610626220703
training loss: 2.871051549911499
training loss: 2.90734601020813
training loss: 2.8662805557250977
training loss: 2.858520746231079
training loss: 2.9378132820129395


training:  42%|██████████████████████████████████████████████████▉                                                                     | 385/907 [09:10<12:29,  1.44s/it]

training loss: 2.8790626525878906
validation loss: 2.92681884765625
training loss: 2.861964702606201
training loss: 2.902118444442749
training loss: 2.8701953887939453
training loss: 2.9252376556396484
training loss: 2.9246435165405273
training loss: 2.853363275527954
training loss: 2.922258138656616


training:  43%|███████████████████████████████████████████████████▉                                                                    | 393/907 [09:22<12:16,  1.43s/it]

training loss: 2.9104666709899902
training loss: 2.8686182498931885
training loss: 2.865060567855835
training loss: 2.9083175659179688
training loss: 2.9175069332122803
validation loss: 2.881948471069336
training loss: 2.92276668548584
training loss: 2.863384962081909
training loss: 2.8743441104888916


training:  44%|█████████████████████████████████████████████████████                                                                   | 401/907 [09:33<12:06,  1.43s/it]

training loss: 2.85782527923584
training loss: 2.853069305419922
training loss: 2.8920722007751465
training loss: 2.8794238567352295
training loss: 2.869417905807495
training loss: 2.8843677043914795
training loss: 2.8637917041778564
training loss: 2.8502449989318848
training loss: 2.824427366256714
validation loss: 2.9079384803771973
training loss: 2.907952070236206
training loss: 2.8481521606445312
training loss: 2.891928195953369
training loss: 2.9045872688293457


training:  46%|██████████████████████████████████████████████████████▊                                                                 | 414/907 [09:44<10:13,  1.25s/it]

training loss: 2.9176597595214844
training loss: 2.898050546646118
training loss: 2.886167287826538
training loss: 2.8713128566741943
training loss: 2.8986093997955322
training loss: 2.852421998977661
training loss: 2.86275315284729
training loss: 2.872190475463867
validation loss: 2.8661365509033203
training loss: 2.9044768810272217
training loss: 2.9256045818328857
training loss: 2.9345126152038574
training loss: 2.829150676727295
training loss: 2.885774612426758
training loss: 2.854261875152588
training loss: 2.9012956619262695


training:  47%|████████████████████████████████████████████████████████▊                                                               | 429/907 [09:54<08:38,  1.08s/it]

training loss: 2.8880093097686768
training loss: 2.9324119091033936
training loss: 2.8798277378082275
training loss: 2.8694822788238525
training loss: 2.8964285850524902
validation loss: 2.8686201572418213
training loss: 2.8915998935699463
training loss: 2.8362226486206055
training loss: 2.8605458736419678
training loss: 2.909567356109619
training loss: 2.9026083946228027
training loss: 2.8856418132781982
training loss: 2.8283944129943848
training loss: 2.8956408500671387
training loss: 2.910292148590088
training loss: 2.872804641723633


training:  49%|██████████████████████████████████████████████████████████▋                                                             | 444/907 [10:05<07:30,  1.03it/s]

training loss: 2.897538900375366
training loss: 2.8653390407562256
validation loss: 2.8948910236358643
training loss: 2.8614540100097656
training loss: 2.8668160438537598
training loss: 2.8768274784088135
training loss: 2.8526804447174072
training loss: 2.9176578521728516
training loss: 2.8702893257141113
training loss: 2.911862850189209
training loss: 2.8582279682159424
training loss: 2.905726194381714
training loss: 2.8703625202178955
training loss: 2.8775079250335693
training loss: 2.893143892288208
validation loss: 2.892366409301758
training loss: 2.887714147567749


training:  51%|████████████████████████████████████████████████████████████▋                                                           | 459/907 [10:16<06:40,  1.12it/s]

training loss: 2.8939645290374756
training loss: 2.8917837142944336
training loss: 2.8720781803131104
training loss: 2.8794147968292236
training loss: 2.880819320678711
training loss: 2.8837668895721436
training loss: 2.9062724113464355
training loss: 2.857802629470825
training loss: 2.899193525314331
training loss: 2.9051358699798584
training loss: 2.907254695892334
validation loss: 2.9267497062683105
training loss: 2.9036757946014404
training loss: 2.838775873184204
training loss: 2.898134469985962
training loss: 2.859379529953003


training:  52%|██████████████████████████████████████████████████████████████▋                                                         | 474/907 [10:26<06:03,  1.19it/s]

training loss: 2.8860411643981934
training loss: 2.854551315307617
training loss: 2.882607936859131
training loss: 2.898397922515869
training loss: 2.888826370239258
training loss: 2.8835980892181396
training loss: 2.870659589767456
training loss: 2.8841936588287354
validation loss: 2.8742544651031494
training loss: 2.917734146118164
training loss: 2.874622344970703
training loss: 2.8972418308258057
training loss: 2.8897547721862793
training loss: 2.8407835960388184
training loss: 2.8826117515563965
training loss: 2.8687756061553955


training:  54%|████████████████████████████████████████████████████████████████▋                                                       | 489/907 [10:37<05:35,  1.25it/s]

training loss: 2.9057188034057617
training loss: 2.806204080581665
training loss: 2.8509745597839355
training loss: 2.9066600799560547
training loss: 2.890974283218384
validation loss: 2.8983261585235596
training loss: 2.8766870498657227
training loss: 2.86763858795166
training loss: 2.840106248855591
training loss: 2.896496295928955
training loss: 2.8996622562408447
training loss: 2.87658953666687
training loss: 2.847461700439453
training loss: 2.8904569149017334
training loss: 2.854764223098755
training loss: 2.91385817527771


training:  56%|██████████████████████████████████████████████████████████████████▋                                                     | 504/907 [10:48<05:12,  1.29it/s]

training loss: 2.9021947383880615
training loss: 2.906304121017456
validation loss: 2.898467540740967
training loss: 2.908355236053467
training loss: 2.8469998836517334
training loss: 2.9000325202941895
training loss: 2.8965702056884766
training loss: 2.8811800479888916
training loss: 2.8928399085998535
training loss: 2.9035685062408447
training loss: 2.895292043685913
training loss: 2.881854772567749
training loss: 2.860330820083618
training loss: 2.8668527603149414
training loss: 2.894670009613037
validation loss: 2.8453454971313477
training loss: 2.861466407775879


training:  57%|████████████████████████████████████████████████████████████████████▋                                                   | 519/907 [10:58<04:53,  1.32it/s]

training loss: 2.8571789264678955
training loss: 2.8492050170898438
training loss: 2.925861358642578
training loss: 2.966688394546509
training loss: 2.850827693939209
training loss: 2.8728320598602295
training loss: 2.8806066513061523
training loss: 2.8875417709350586
training loss: 2.909874677658081
training loss: 2.848095178604126
training loss: 2.9260618686676025
validation loss: 2.8504092693328857
training loss: 2.848564624786377
training loss: 2.86569881439209
training loss: 2.8867924213409424
training loss: 2.885664701461792


training:  59%|██████████████████████████████████████████████████████████████████████▋                                                 | 534/907 [11:09<04:36,  1.35it/s]

training loss: 2.8301033973693848
training loss: 2.9134271144866943
training loss: 2.8725085258483887
training loss: 2.9151387214660645
training loss: 2.884723424911499
training loss: 2.8602616786956787
training loss: 2.8693933486938477
training loss: 2.896054267883301
validation loss: 2.8956730365753174
training loss: 2.8812167644500732
training loss: 2.9286139011383057
training loss: 2.9276139736175537
training loss: 2.873983860015869
training loss: 2.8794331550598145
training loss: 2.880566120147705
training loss: 2.9057040214538574


training:  61%|████████████████████████████████████████████████████████████████████████▋                                               | 549/907 [11:20<04:21,  1.37it/s]

training loss: 2.8693361282348633
training loss: 2.9577105045318604
training loss: 2.879214286804199
training loss: 2.865206241607666
training loss: 2.863393545150757
validation loss: 2.9176387786865234
training loss: 2.8713696002960205
training loss: 2.8696658611297607
training loss: 2.913170576095581
training loss: 2.9321351051330566
training loss: 2.8582634925842285
training loss: 2.9285600185394287
training loss: 3.1466469764709473
training loss: 2.8896377086639404
training loss: 2.9195220470428467
training loss: 2.8830840587615967


training:  62%|██████████████████████████████████████████████████████████████████████████▌                                             | 564/907 [11:30<04:08,  1.38it/s]

training loss: 2.9212746620178223
training loss: 2.8864786624908447
validation loss: 2.8935015201568604
training loss: 2.9385905265808105
training loss: 2.9337754249572754
training loss: 2.8993797302246094
training loss: 2.9977712631225586
training loss: 2.9146265983581543
training loss: 2.852786064147949
training loss: 2.876972198486328
training loss: 2.8983776569366455
training loss: 2.8874688148498535
training loss: 2.8913023471832275
training loss: 2.893552541732788
training loss: 2.89231538772583
validation loss: 2.891759157180786
training loss: 2.8640079498291016


training:  64%|████████████████████████████████████████████████████████████████████████████▌                                           | 579/907 [11:41<03:58,  1.38it/s]

training loss: 2.8876302242279053
training loss: 2.9042131900787354
training loss: 2.8641068935394287
training loss: 2.8576552867889404
training loss: 2.8925929069519043
training loss: 2.8644649982452393
training loss: 2.9287521839141846
training loss: 2.8966572284698486
training loss: 2.900891065597534
training loss: 2.8750672340393066
training loss: 2.8925347328186035
validation loss: 2.8980326652526855
training loss: 2.8798410892486572
training loss: 2.909607172012329
training loss: 2.885991096496582
training loss: 2.88169002532959


training:  65%|██████████████████████████████████████████████████████████████████████████████▌                                         | 594/907 [11:52<03:46,  1.38it/s]

training loss: 2.888768196105957
training loss: 2.8732175827026367
training loss: 2.921071767807007
training loss: 2.942915439605713
training loss: 2.9105496406555176
training loss: 2.854893207550049
training loss: 2.8843541145324707
training loss: 2.9422547817230225
validation loss: 2.9182963371276855
training loss: 2.8996241092681885
training loss: 2.8816347122192383
training loss: 2.9003841876983643
training loss: 2.870976448059082
training loss: 2.864434003829956
training loss: 2.924555540084839
training loss: 2.8454153537750244


training:  67%|████████████████████████████████████████████████████████████████████████████████▌                                       | 609/907 [12:03<03:34,  1.39it/s]

training loss: 2.916780471801758
training loss: 2.8766136169433594
training loss: 2.886035919189453
training loss: 2.8928325176239014
training loss: 2.836575746536255
validation loss: 2.8582544326782227
training loss: 2.8890533447265625
training loss: 2.9109253883361816
training loss: 2.9151601791381836
training loss: 2.881535530090332
training loss: 2.881798505783081
training loss: 2.828228712081909
training loss: 2.901796340942383
training loss: 2.8531110286712646
training loss: 2.8905701637268066
training loss: 2.8937251567840576


training:  69%|██████████████████████████████████████████████████████████████████████████████████▌                                     | 624/907 [12:13<03:22,  1.40it/s]

training loss: 2.8907010555267334
training loss: 2.8589670658111572
validation loss: 2.869309425354004
training loss: 2.8323776721954346
training loss: 2.867116689682007
training loss: 2.878305435180664
training loss: 2.9166345596313477
training loss: 2.8939971923828125
training loss: 2.8495686054229736
training loss: 2.883579969406128
training loss: 2.873417377471924
training loss: 2.879549503326416
training loss: 2.8207879066467285
training loss: 2.831923723220825
training loss: 2.93666934967041
validation loss: 2.8770456314086914
training loss: 2.88108491897583


training:  70%|████████████████████████████████████████████████████████████████████████████████████▌                                   | 639/907 [12:24<03:12,  1.40it/s]

training loss: 2.836864471435547
training loss: 2.83731746673584
training loss: 2.8561851978302
training loss: 2.832886219024658
training loss: 2.937492847442627
training loss: 2.859086275100708
training loss: 2.869293212890625
training loss: 2.8746376037597656
training loss: 2.900045871734619
training loss: 2.8701415061950684
training loss: 2.87302827835083
validation loss: 2.883195400238037
training loss: 2.8792073726654053
training loss: 2.883363962173462
training loss: 2.908433437347412
training loss: 2.8894357681274414


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▌                                 | 654/907 [12:35<03:00,  1.40it/s]

training loss: 2.8811028003692627
training loss: 2.888943672180176
training loss: 2.853210926055908
training loss: 2.870323896408081
training loss: 2.859198570251465
training loss: 2.853914260864258
training loss: 2.8850955963134766
training loss: 2.880837917327881
validation loss: 2.9128787517547607
training loss: 2.9060909748077393
training loss: 2.894008159637451
training loss: 2.933595657348633
training loss: 2.915590763092041
training loss: 2.906956195831299
training loss: 2.8947207927703857
training loss: 2.891639471054077


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▌                               | 669/907 [12:45<02:49,  1.40it/s]

training loss: 2.9127659797668457
training loss: 2.8884286880493164
training loss: 2.9335625171661377
training loss: 2.844484329223633
training loss: 2.8541107177734375
validation loss: 2.8870160579681396
training loss: 2.937467098236084
training loss: 2.91565203666687
training loss: 2.855210065841675
training loss: 2.910083055496216
training loss: 2.8441615104675293
training loss: 2.8524210453033447
training loss: 2.899858236312866
training loss: 2.871140718460083
training loss: 2.927110195159912
training loss: 2.8866312503814697


training:  75%|██████████████████████████████████████████████████████████████████████████████████████████▍                             | 684/907 [12:56<02:38,  1.40it/s]

training loss: 2.8418500423431396
training loss: 2.896214246749878
validation loss: 2.9073522090911865
training loss: 2.9094817638397217
training loss: 2.894040822982788
training loss: 2.8799211978912354
training loss: 2.9081830978393555
training loss: 2.8767778873443604
training loss: 2.92503023147583
training loss: 2.913471221923828
training loss: 2.8920326232910156
training loss: 2.890784978866577
training loss: 2.8686211109161377
training loss: 2.888219118118286
training loss: 2.9000964164733887
validation loss: 2.8605570793151855
training loss: 2.8938064575195312


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▍                           | 699/907 [13:07<02:28,  1.40it/s]

training loss: 2.9168953895568848
training loss: 2.914116621017456
training loss: 2.899721622467041
training loss: 2.8555428981781006
training loss: 2.964797258377075
training loss: 2.942246437072754
training loss: 2.886594295501709
training loss: 2.9269583225250244
training loss: 2.8751935958862305
training loss: 2.9108617305755615
training loss: 2.8965132236480713
validation loss: 2.8549160957336426
training loss: 2.903473138809204
training loss: 2.8765132427215576
training loss: 2.8983006477355957
training loss: 2.888110637664795


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▍                         | 714/907 [13:17<02:17,  1.41it/s]

training loss: 2.8638880252838135
training loss: 2.875110149383545
training loss: 2.838299512863159
training loss: 2.875662088394165
training loss: 2.9337594509124756
training loss: 2.8615100383758545
training loss: 2.8316702842712402
training loss: 2.8685734272003174
validation loss: 2.893104076385498
training loss: 2.9524424076080322
training loss: 2.8383848667144775
training loss: 2.8478152751922607
training loss: 2.8877339363098145
training loss: 2.857714891433716
training loss: 2.925595760345459
training loss: 2.901214599609375


training:  80%|████████████████████████████████████████████████████████████████████████████████████████████████▍                       | 729/907 [13:28<02:06,  1.41it/s]

training loss: 2.861116886138916
training loss: 2.8819539546966553
training loss: 2.9043400287628174
training loss: 2.8792648315429688
training loss: 2.928609609603882
validation loss: 2.8817551136016846
training loss: 2.8639307022094727
training loss: 2.898017644882202
training loss: 2.8764636516571045
training loss: 2.904262065887451
training loss: 2.8672032356262207
training loss: 2.8623416423797607
training loss: 2.908273458480835
training loss: 2.8398637771606445
training loss: 2.906890630722046
training loss: 2.8726277351379395


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 744/907 [13:39<01:55,  1.41it/s]

training loss: 2.834873914718628
training loss: 2.886378526687622
validation loss: 2.8646745681762695
training loss: 2.8732964992523193
training loss: 2.878276824951172
training loss: 2.8985443115234375
training loss: 2.9168028831481934
training loss: 2.9055566787719727
training loss: 2.871666669845581
training loss: 2.88326358795166
training loss: 2.8514819145202637
training loss: 2.8740394115448
training loss: 2.8824453353881836
training loss: 2.8699352741241455
training loss: 2.839672565460205
validation loss: 2.8737847805023193
training loss: 2.8881638050079346


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 759/907 [13:49<01:45,  1.41it/s]

training loss: 2.8975634574890137
training loss: 2.8611087799072266
training loss: 2.9361400604248047
training loss: 2.900933265686035
training loss: 2.8629868030548096
training loss: 2.820364236831665
training loss: 2.8565757274627686
training loss: 2.9262545108795166
training loss: 2.869250774383545
training loss: 2.8517045974731445
training loss: 2.871760129928589
validation loss: 2.802490234375
training loss: 2.882701873779297
training loss: 2.8650448322296143
training loss: 2.844412088394165
training loss: 2.8954274654388428


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 774/907 [14:00<01:34,  1.41it/s]

training loss: 2.8760883808135986
training loss: 2.8562517166137695
training loss: 2.9053955078125
training loss: 2.9116992950439453
training loss: 2.850623846054077
training loss: 2.8752615451812744
training loss: 2.8902761936187744
training loss: 2.8744778633117676
validation loss: 2.891167163848877
training loss: 2.890537738800049
training loss: 2.931767702102661
training loss: 2.903756618499756
training loss: 2.9086170196533203
training loss: 2.8396832942962646
training loss: 2.870917558670044
training loss: 2.8446013927459717


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 789/907 [14:10<01:23,  1.41it/s]

training loss: 2.8810133934020996
training loss: 2.9299795627593994
training loss: 2.8677778244018555
training loss: 2.899439573287964
training loss: 2.912801742553711
validation loss: 2.890843629837036
training loss: 2.871156930923462
training loss: 2.88916277885437
training loss: 2.895181179046631
training loss: 2.8862991333007812
training loss: 2.891903877258301
training loss: 2.9029178619384766
training loss: 2.9008560180664062
training loss: 2.9084060192108154
training loss: 2.8644020557403564
training loss: 2.870154857635498


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 804/907 [14:21<01:13,  1.41it/s]

training loss: 2.8757219314575195
training loss: 2.872959852218628
validation loss: 2.887698173522949
training loss: 2.8982462882995605
training loss: 2.8479418754577637
training loss: 2.9135546684265137
training loss: 2.8513031005859375
training loss: 2.886110305786133
training loss: 2.886991500854492
training loss: 2.818775177001953
training loss: 2.8918778896331787
training loss: 2.861015796661377
training loss: 2.9159507751464844
training loss: 2.8579890727996826
training loss: 2.992506504058838
validation loss: 2.8619351387023926
training loss: 2.8799338340759277


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▎           | 819/907 [14:32<01:02,  1.41it/s]

training loss: 2.8265819549560547
training loss: 2.898001194000244
training loss: 2.9069855213165283
training loss: 2.9128198623657227
training loss: 2.856090545654297
training loss: 2.915217161178589
training loss: 2.8770503997802734
training loss: 2.9292266368865967
training loss: 2.881350040435791
training loss: 2.872833251953125
training loss: 2.879178524017334
validation loss: 2.8868558406829834
training loss: 2.9404242038726807
training loss: 2.8695390224456787
training loss: 2.9224343299865723
training loss: 2.900761127471924


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 834/907 [14:42<00:51,  1.41it/s]

training loss: 2.8393211364746094
training loss: 2.9167823791503906
training loss: 2.8924381732940674
training loss: 2.8971898555755615
training loss: 2.8445630073547363
training loss: 2.894977331161499
training loss: 2.8974008560180664
training loss: 2.9172136783599854
validation loss: 2.8629348278045654
training loss: 2.920628786087036
training loss: 2.8759562969207764
training loss: 2.8382182121276855
training loss: 2.8404698371887207
training loss: 2.8783490657806396
training loss: 2.9019439220428467
training loss: 2.860947370529175


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 849/907 [14:53<00:41,  1.41it/s]

training loss: 2.818142890930176
training loss: 2.8908379077911377
training loss: 2.883543014526367
training loss: 2.8700835704803467
training loss: 2.8753879070281982
validation loss: 2.8568930625915527
training loss: 2.938467264175415
training loss: 2.8866982460021973
training loss: 2.936993360519409
training loss: 2.8855223655700684
training loss: 2.883038282394409
training loss: 2.9112637042999268
training loss: 2.9327807426452637
training loss: 2.908050775527954
training loss: 2.896838665008545
training loss: 2.8939783573150635


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 864/907 [15:04<00:30,  1.41it/s]

training loss: 2.9739983081817627
training loss: 2.905581474304199
validation loss: 2.88295841217041
training loss: 2.8889577388763428
training loss: 2.885756254196167
training loss: 2.8681936264038086
training loss: 2.852872133255005
training loss: 2.904147148132324
training loss: 2.8798904418945312
training loss: 2.9453840255737305
training loss: 2.9117324352264404
training loss: 2.884556531906128
training loss: 2.878504753112793
training loss: 2.8739748001098633
training loss: 2.895936965942383
validation loss: 2.89444637298584
training loss: 2.8726625442504883


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 879/907 [15:14<00:19,  1.41it/s]

training loss: 2.895209312438965
training loss: 2.8653507232666016
training loss: 2.9046730995178223
training loss: 2.886585235595703
training loss: 2.8878817558288574
training loss: 2.890854597091675
training loss: 2.9017415046691895
training loss: 2.8659698963165283
training loss: 2.8837058544158936
training loss: 2.9307913780212402
training loss: 2.876110553741455
validation loss: 2.8617074489593506
training loss: 2.896880626678467
training loss: 2.9002487659454346
training loss: 2.861504554748535
training loss: 2.880265235900879


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 894/907 [15:25<00:09,  1.41it/s]

training loss: 2.9145431518554688
training loss: 2.8933019638061523
training loss: 2.911574602127075
training loss: 2.893397092819214
training loss: 2.8975887298583984
training loss: 2.8530526161193848
training loss: 2.880702018737793
training loss: 2.9113030433654785
validation loss: 2.901155948638916
training loss: 2.846673011779785
training loss: 2.8551933765411377
training loss: 2.905571937561035
training loss: 2.864086627960205
training loss: 2.882725715637207


training: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 907/907 [15:34<00:00,  1.03s/it]
training:   0%|                                                                                                                                  | 0/907 [00:00<?, ?it/s]

training loss: 2.855170488357544
training loss: 2.957958698272705
validation loss: 2.8673017024993896
training loss: 2.823662519454956
training loss: 2.931084156036377
training loss: 2.8729188442230225
training loss: 2.885213613510132
training loss: 2.8817806243896484
training loss: 2.90535569190979
training loss: 2.856593608856201
training loss: 2.8950953483581543
training loss: 2.8819921016693115
training loss: 2.9105567932128906
training loss: 2.8889262676239014
training loss: 2.884373188018799
validation loss: 2.880648612976074
training loss: 2.8684873580932617


training:   2%|██                                                                                                                       | 15/907 [00:10<10:34,  1.41it/s]

training loss: 2.8507046699523926
training loss: 2.880514621734619
training loss: 2.8982253074645996
training loss: 2.8499462604522705
training loss: 2.875716209411621
training loss: 2.825373411178589
training loss: 2.8727927207946777
training loss: 2.920039176940918
training loss: 2.877359390258789
training loss: 2.925063133239746
training loss: 2.8796334266662598
validation loss: 2.8754138946533203
training loss: 2.8748250007629395
training loss: 2.933079719543457
training loss: 2.895555257797241
training loss: 2.8690009117126465


training:   3%|████                                                                                                                     | 30/907 [00:21<10:23,  1.41it/s]

training loss: 2.948244571685791
training loss: 2.9354116916656494
training loss: 2.907205104827881
training loss: 2.8710665702819824
training loss: 2.8658370971679688
training loss: 2.880605459213257
training loss: 2.865295171737671
training loss: 2.9336838722229004
validation loss: 2.9143242835998535
training loss: 2.9191644191741943
training loss: 2.9035613536834717
training loss: 2.88370418548584
training loss: 2.8927254676818848
training loss: 2.8524491786956787
training loss: 2.8822433948516846
training loss: 2.9113078117370605


training:   5%|██████                                                                                                                   | 45/907 [00:31<10:11,  1.41it/s]

training loss: 2.848525047302246
training loss: 2.88749623298645
training loss: 2.8922510147094727
training loss: 2.8848516941070557
training loss: 2.925764322280884
validation loss: 2.8646421432495117
training loss: 2.9236905574798584
training loss: 2.9809350967407227
training loss: 2.9103760719299316
training loss: 2.882185935974121
training loss: 2.856928586959839
training loss: 2.8334712982177734
training loss: 2.872321844100952
training loss: 2.8924670219421387
training loss: 2.9019641876220703
training loss: 2.8179452419281006


training:   7%|████████                                                                                                                 | 60/907 [00:42<10:01,  1.41it/s]

training loss: 2.872894048690796
training loss: 2.868622303009033
validation loss: 2.8840506076812744
training loss: 2.8661739826202393
training loss: 2.945261240005493
training loss: 2.8817272186279297
training loss: 2.8593952655792236
training loss: 2.969250440597534
training loss: 2.9217631816864014
training loss: 2.8623390197753906
training loss: 2.9022464752197266
training loss: 2.8695762157440186
training loss: 2.919517755508423
training loss: 2.923438310623169
training loss: 2.857226848602295
validation loss: 2.864945411682129
training loss: 2.831789255142212


training:   8%|██████████                                                                                                               | 75/907 [00:53<09:55,  1.40it/s]

training loss: 2.8595380783081055
training loss: 2.9225783348083496
training loss: 2.894456386566162
training loss: 2.8961679935455322
training loss: 2.875807762145996
training loss: 2.9234812259674072
training loss: 2.865257978439331
training loss: 2.8718812465667725
training loss: 2.857276678085327
training loss: 2.855548620223999
training loss: 2.892699956893921
validation loss: 2.9179842472076416
training loss: 2.890437126159668
training loss: 2.879242181777954
training loss: 2.8952689170837402
training loss: 2.8753714561462402


training:  10%|████████████                                                                                                             | 90/907 [01:04<09:44,  1.40it/s]

training loss: 2.906425714492798
training loss: 2.906170129776001
training loss: 2.9046740531921387
training loss: 2.8460805416107178
training loss: 2.8929429054260254
training loss: 2.903944253921509
training loss: 2.915555953979492
training loss: 2.898261070251465
validation loss: 2.9003920555114746
training loss: 2.895843744277954
training loss: 2.854135751724243
training loss: 2.833916664123535
training loss: 2.8882174491882324
training loss: 2.867117404937744
training loss: 2.865558385848999
training loss: 2.9065332412719727


training:  12%|█████████████▉                                                                                                          | 105/907 [01:15<09:34,  1.40it/s]

training loss: 2.930492639541626
training loss: 2.9301767349243164
training loss: 2.8275818824768066
training loss: 2.8825128078460693
training loss: 2.8540420532226562
validation loss: 2.900498390197754
training loss: 2.8823585510253906
training loss: 2.8546853065490723
training loss: 2.9127891063690186
training loss: 2.896109104156494
training loss: 2.929830551147461
training loss: 2.8663735389709473
training loss: 2.893632173538208
training loss: 2.8420045375823975
training loss: 2.860527753829956


training:  13%|███████████████▋                                                                                                        | 119/907 [01:25<09:24,  1.40it/s]

training loss: 2.9135165214538574
training loss: 2.8978145122528076
training loss: 2.885340690612793
validation loss: 2.9081408977508545
training loss: 2.869616746902466
training loss: 2.9039502143859863
training loss: 2.903247356414795
training loss: 2.8732714653015137
training loss: 2.9525222778320312
training loss: 2.894881248474121
training loss: 2.861809015274048
training loss: 2.865659713745117
training loss: 2.8756675720214844
training loss: 2.853663682937622
training loss: 2.9186484813690186


training:  15%|█████████████████▌                                                                                                      | 133/907 [01:35<09:14,  1.40it/s]

training loss: 2.870058536529541
validation loss: 2.835200309753418
training loss: 2.8611886501312256
training loss: 2.8943355083465576
training loss: 2.9369888305664062
training loss: 2.8930795192718506
training loss: 2.8500232696533203
training loss: 2.8990795612335205
training loss: 2.8866920471191406
training loss: 2.8959245681762695
training loss: 2.8911399841308594
training loss: 2.871583938598633
training loss: 2.8800899982452393
training loss: 2.8780415058135986
validation loss: 2.891218662261963
training loss: 2.8550076484680176


training:  16%|███████████████████▍                                                                                                    | 147/907 [01:45<09:10,  1.38it/s]

training loss: 2.9153568744659424
training loss: 2.9028823375701904
training loss: 2.892545700073242
training loss: 2.8843016624450684
training loss: 2.9327471256256104
training loss: 2.9158692359924316
training loss: 2.842275619506836
training loss: 2.9003453254699707
training loss: 2.85878586769104
training loss: 2.886470317840576
training loss: 2.849426507949829
validation loss: 2.870462417602539
training loss: 2.843444585800171
training loss: 2.8893656730651855
training loss: 2.9051225185394287


training:  18%|█████████████████████▎                                                                                                  | 161/907 [01:55<09:01,  1.38it/s]

training loss: 2.8591654300689697
training loss: 2.9734292030334473
training loss: 2.882277250289917
training loss: 2.918273687362671
training loss: 2.8734610080718994
training loss: 2.898263692855835
training loss: 2.8875436782836914
training loss: 2.8414828777313232
training loss: 2.8853912353515625
validation loss: 2.8675343990325928
training loss: 2.916684627532959
training loss: 2.87064266204834
training loss: 2.8652663230895996
training loss: 2.909083127975464
training loss: 2.9043502807617188


training:  19%|███████████████████████▏                                                                                                | 175/907 [02:05<08:52,  1.37it/s]

training loss: 2.8956804275512695
training loss: 2.8695623874664307
training loss: 2.8684229850769043
training loss: 2.8431031703948975
training loss: 2.896960496902466
training loss: 2.902419328689575
training loss: 2.876845598220825
validation loss: 2.8666231632232666
training loss: 2.896653652191162
training loss: 2.863746404647827
training loss: 2.8743436336517334
training loss: 2.8870909214019775
training loss: 2.9319725036621094
training loss: 2.9023241996765137
training loss: 2.9063472747802734


training:  21%|█████████████████████████                                                                                               | 189/907 [02:16<08:43,  1.37it/s]

training loss: 2.846226453781128
training loss: 2.90429949760437
training loss: 2.8985719680786133
training loss: 2.883236885070801
training loss: 2.8941750526428223
validation loss: 2.8816418647766113
training loss: 2.878981590270996
training loss: 2.880254030227661
training loss: 2.9150280952453613
training loss: 2.857196807861328
training loss: 2.828219413757324
training loss: 2.8443145751953125
training loss: 2.8649189472198486
training loss: 2.8549771308898926
training loss: 2.8486666679382324


training:  22%|██████████████████████████▊                                                                                             | 203/907 [02:26<08:33,  1.37it/s]

training loss: 2.929769515991211
training loss: 2.9666876792907715
training loss: 2.853501796722412
validation loss: 2.8916592597961426
training loss: 2.8428075313568115
training loss: 2.8573708534240723
training loss: 2.876987934112549
training loss: 2.849151849746704
training loss: 2.8881373405456543
training loss: 2.8539299964904785
training loss: 2.8501338958740234
training loss: 2.8633439540863037
training loss: 2.8899600505828857
training loss: 2.894758462905884
training loss: 2.8314337730407715


training:  24%|████████████████████████████▋                                                                                           | 217/907 [02:37<08:41,  1.32it/s]

training loss: 2.908726453781128
validation loss: 2.859480142593384
training loss: 2.872011661529541
training loss: 2.9037868976593018
training loss: 2.91355037689209
training loss: 2.8956174850463867
training loss: 2.8686602115631104
training loss: 2.8950541019439697
training loss: 2.8772220611572266
training loss: 2.9219777584075928
training loss: 2.928565740585327
training loss: 2.870647668838501
training loss: 2.8780157566070557
training loss: 2.8754289150238037
validation loss: 2.8818318843841553
training loss: 2.895613431930542


training:  25%|██████████████████████████████▌                                                                                         | 231/907 [02:48<08:38,  1.30it/s]

training loss: 2.919020414352417
training loss: 2.8954451084136963
training loss: 2.8929555416107178
training loss: 2.920663833618164
training loss: 2.9235684871673584
training loss: 2.8741438388824463
training loss: 2.8725950717926025
training loss: 2.9189059734344482
training loss: 2.9347803592681885
training loss: 2.8599507808685303


training:  27%|███████████████████████████████▉                                                                                        | 241/907 [03:00<09:48,  1.13it/s]

training loss: 2.929882287979126
validation loss: 2.861250400543213
training loss: 2.8878870010375977
training loss: 2.905395984649658
training loss: 2.9009528160095215
training loss: 3.01318359375
training loss: 2.8602795600891113
training loss: 2.8983938694000244
training loss: 2.929614543914795
training loss: 2.9377191066741943


training:  28%|█████████████████████████████████                                                                                       | 250/907 [03:12<11:12,  1.02s/it]

training loss: 2.901975631713867
training loss: 2.9993767738342285
training loss: 2.917544364929199
training loss: 2.8500072956085205
validation loss: 2.8806533813476562
training loss: 2.9593942165374756
training loss: 2.875856399536133
training loss: 2.8906733989715576
training loss: 2.8998982906341553


training:  28%|██████████████████████████████████▏                                                                                     | 258/907 [03:29<14:27,  1.34s/it]

training loss: 2.9211266040802
training loss: 2.8771986961364746
training loss: 2.8679256439208984
training loss: 2.8905632495880127
training loss: 2.9041285514831543


training:  29%|██████████████████████████████████▊                                                                                     | 263/907 [03:41<17:57,  1.67s/it]

training loss: 2.8672099113464355
training loss: 2.8706297874450684
training loss: 2.8848021030426025
validation loss: 2.8957457542419434
training loss: 2.858677864074707
training loss: 2.870614528656006


training:  30%|███████████████████████████████████▍                                                                                    | 268/907 [04:01<25:30,  2.39s/it]

training loss: 2.8893094062805176
training loss: 2.8890247344970703
training loss: 2.9120283126831055


training:  30%|███████████████████████████████████▊                                                                                    | 271/907 [04:16<32:48,  3.09s/it]

training loss: 2.901014566421509
training loss: 2.876305103302002
training loss: 2.9108047485351562


training:  30%|████████████████████████████████████▎                                                                                   | 274/907 [04:30<37:42,  3.57s/it]

training loss: 2.8808059692382812
training loss: 2.8811330795288086
training loss: 2.8908181190490723
training loss: 2.8751068115234375


training:  31%|████████████████████████████████████▋                                                                                   | 277/907 [04:45<42:11,  4.02s/it]

validation loss: 2.845031499862671
training loss: 2.920863628387451
training loss: 2.8741567134857178


training:  31%|█████████████████████████████████████                                                                                   | 280/907 [04:57<42:07,  4.03s/it]

training loss: 2.8996853828430176
training loss: 2.889103889465332
training loss: 2.8693292140960693


training:  31%|█████████████████████████████████████▍                                                                                  | 283/907 [05:11<43:33,  4.19s/it]

training loss: 2.9135568141937256
training loss: 2.9138689041137695
training loss: 2.8820064067840576


training:  32%|█████████████████████████████████████▊                                                                                  | 286/907 [05:23<43:00,  4.15s/it]

training loss: 2.8969790935516357
training loss: 2.8826937675476074
training loss: 2.8600025177001953
training loss: 2.934926748275757


training:  32%|██████████████████████████████████████▏                                                                                 | 289/907 [05:37<44:01,  4.27s/it]

validation loss: 2.871615171432495
training loss: 2.881040096282959
training loss: 2.919586420059204


training:  32%|██████████████████████████████████████▋                                                                                 | 292/907 [05:50<44:32,  4.35s/it]

training loss: 2.867234945297241
training loss: 2.8602206707000732
training loss: 2.8613646030426025


training:  33%|███████████████████████████████████████                                                                                 | 295/907 [06:04<45:33,  4.47s/it]

training loss: 2.8492469787597656
training loss: 2.8881890773773193
training loss: 2.9144744873046875


training:  33%|███████████████████████████████████████▍                                                                                | 298/907 [06:18<45:45,  4.51s/it]

training loss: 2.9110519886016846
training loss: 2.88038969039917
training loss: 2.867370843887329


training:  33%|███████████████████████████████████████▍                                                                                | 298/907 [06:30<45:45,  4.51s/it]

training loss: 2.838970422744751


training:  33%|███████████████████████████████████████▊                                                                                | 301/907 [06:34<47:45,  4.73s/it]

validation loss: 2.8799593448638916
training loss: 2.8717072010040283
training loss: 2.8878331184387207


training:  34%|████████████████████████████████████████▏                                                                               | 304/907 [06:48<47:17,  4.71s/it]

training loss: 2.8699042797088623
training loss: 2.942474126815796
training loss: 2.889086961746216


training:  34%|████████████████████████████████████████▌                                                                               | 307/907 [07:01<46:09,  4.62s/it]

training loss: 2.8711698055267334
training loss: 2.8278326988220215
training loss: 2.872908353805542


training:  34%|█████████████████████████████████████████                                                                               | 310/907 [07:13<44:22,  4.46s/it]

training loss: 2.882740020751953
training loss: 2.917532205581665
training loss: 2.891894817352295
training loss: 2.847775936126709


training:  35%|█████████████████████████████████████████▍                                                                              | 313/907 [07:27<44:38,  4.51s/it]

validation loss: 2.862905502319336
training loss: 2.8724024295806885


training:  35%|█████████████████████████████████████████▋                                                                              | 315/907 [07:40<49:37,  5.03s/it]

training loss: 2.8818752765655518
training loss: 2.865006923675537
training loss: 2.8600189685821533


training:  35%|██████████████████████████████████████████                                                                              | 318/907 [07:55<49:08,  5.01s/it]

training loss: 2.8818857669830322
training loss: 2.8748693466186523
training loss: 2.877326726913452


training:  35%|██████████████████████████████████████████▍                                                                             | 321/907 [08:14<53:23,  5.47s/it]

training loss: 2.841130256652832
training loss: 2.836987257003784


training:  36%|██████████████████████████████████████████▋                                                                             | 323/907 [08:27<55:42,  5.72s/it]

training loss: 2.854074001312256
training loss: 2.8170127868652344
training loss: 2.9289627075195312


training:  36%|██████████████████████████████████████████▉                                                                             | 325/907 [08:37<53:28,  5.51s/it]

validation loss: 2.891270399093628
training loss: 2.8795535564422607
training loss: 2.883737802505493
training loss: 2.904536008834839
training loss: 2.848823070526123
training loss: 2.898061990737915


training:  36%|███████████████████████████████████████████▊                                                                            | 331/907 [08:47<42:07,  4.39s/it]

training loss: 2.8748879432678223
training loss: 2.8668053150177
training loss: 2.879234790802002
training loss: 2.917746067047119
training loss: 2.8974008560180664


training:  37%|████████████████████████████████████████████▍                                                                           | 336/907 [09:04<38:31,  4.05s/it]

training loss: 2.878777265548706
training loss: 2.882274866104126
validation loss: 2.8686017990112305
training loss: 2.849438190460205
training loss: 2.8935725688934326


training:  37%|████████████████████████████████████████████▉                                                                           | 340/907 [09:23<40:35,  4.30s/it]

training loss: 2.895965099334717
training loss: 2.8985235691070557
training loss: 2.9053399562835693


training:  38%|█████████████████████████████████████████████▍                                                                          | 343/907 [09:39<42:42,  4.54s/it]

training loss: 2.9055967330932617
training loss: 2.9094364643096924


training:  38%|█████████████████████████████████████████████▋                                                                          | 345/907 [09:49<44:06,  4.71s/it]

training loss: 2.877692461013794
training loss: 2.9143829345703125


training:  38%|█████████████████████████████████████████████▉                                                                          | 347/907 [09:59<45:19,  4.86s/it]

training loss: 2.9203317165374756
training loss: 2.886532783508301


training:  38%|█████████████████████████████████████████████▉                                                                          | 347/907 [10:10<45:19,  4.86s/it]

training loss: 2.8917236328125


training:  38%|██████████████████████████████████████████████▏                                                                         | 349/907 [10:10<47:16,  5.08s/it]

validation loss: 2.8928356170654297
training loss: 2.901789426803589


training:  39%|██████████████████████████████████████████████▍                                                                         | 351/907 [10:21<47:48,  5.16s/it]

training loss: 2.8953707218170166
training loss: 2.8530850410461426


training:  39%|██████████████████████████████████████████████▋                                                                         | 353/907 [10:32<48:56,  5.30s/it]

training loss: 2.8941006660461426
training loss: 2.9077823162078857


training:  39%|██████████████████████████████████████████████▉                                                                         | 355/907 [10:44<49:54,  5.42s/it]

training loss: 2.8886663913726807
training loss: 2.9416208267211914


training:  39%|███████████████████████████████████████████████▏                                                                        | 357/907 [10:55<50:28,  5.51s/it]

training loss: 2.9031267166137695
training loss: 2.8549160957336426


training:  40%|███████████████████████████████████████████████▍                                                                        | 359/907 [11:07<52:05,  5.70s/it]

training loss: 2.9082531929016113
training loss: 2.8460071086883545


training:  40%|███████████████████████████████████████████████▍                                                                        | 359/907 [11:20<52:05,  5.70s/it]

training loss: 2.8546628952026367


training:  40%|███████████████████████████████████████████████▊                                                                        | 361/907 [11:21<54:24,  5.98s/it]

validation loss: 2.8755507469177246
training loss: 2.8774847984313965


training:  40%|████████████████████████████████████████████████                                                                        | 363/907 [11:32<53:41,  5.92s/it]

training loss: 2.8797245025634766
training loss: 2.8472416400909424


training:  40%|████████████████████████████████████████████████▎                                                                       | 365/907 [11:44<53:46,  5.95s/it]

training loss: 2.897731065750122
training loss: 2.8904478549957275


training:  40%|████████████████████████████████████████████████▌                                                                       | 367/907 [11:57<54:23,  6.04s/it]

training loss: 2.8920881748199463
training loss: 2.900163412094116


training:  41%|████████████████████████████████████████████████▊                                                                       | 369/907 [12:10<55:42,  6.21s/it]

training loss: 2.89315128326416
training loss: 2.876866102218628


training:  41%|█████████████████████████████████████████████████                                                                       | 371/907 [12:22<54:57,  6.15s/it]

training loss: 2.8995954990386963
training loss: 2.87369966506958
training loss: 2.915820360183716
validation loss: 2.905876398086548


training:  41%|█████████████████████████████████████████████████▍                                                                      | 374/907 [12:33<48:12,  5.43s/it]

training loss: 2.839871883392334
training loss: 2.9259097576141357
training loss: 2.903989553451538
training loss: 2.8581430912017822
training loss: 2.86747145652771
training loss: 2.8513455390930176
training loss: 2.8865668773651123
training loss: 2.916898727416992


training:  42%|██████████████████████████████████████████████████▌                                                                     | 382/907 [12:43<36:34,  4.18s/it]

training loss: 2.9096622467041016
training loss: 2.888659954071045
training loss: 2.8499090671539307
training loss: 2.970132350921631
validation loss: 2.830413341522217
training loss: 2.9297070503234863
training loss: 2.8399691581726074
training loss: 2.894108295440674


training:  43%|███████████████████████████████████████████████████▍                                                                    | 389/907 [13:00<31:26,  3.64s/it]

training loss: 2.9046852588653564
training loss: 2.8939266204833984
training loss: 2.850545644760132
training loss: 2.8962950706481934
training loss: 2.8793442249298096


training:  43%|████████████████████████████████████████████████████▏                                                                   | 394/907 [13:21<32:40,  3.82s/it]

training loss: 2.8932383060455322
training loss: 2.8857243061065674
training loss: 2.8618102073669434
training loss: 2.8711204528808594


training:  44%|████████████████████████████████████████████████████▌                                                                   | 397/907 [13:35<34:15,  4.03s/it]

validation loss: 2.865022659301758
training loss: 2.840653896331787
training loss: 2.931924819946289
training loss: 2.8578102588653564


training:  44%|█████████████████████████████████████████████████████                                                                   | 401/907 [13:47<31:32,  3.74s/it]

training loss: 2.8777832984924316
training loss: 2.916459798812866
training loss: 2.8852765560150146
training loss: 2.9468295574188232


training:  45%|█████████████████████████████████████████████████████▌                                                                  | 405/907 [14:03<31:56,  3.82s/it]

training loss: 2.839529514312744
training loss: 2.8464114665985107
training loss: 2.8751442432403564


training:  45%|█████████████████████████████████████████████████████▉                                                                  | 408/907 [14:16<33:02,  3.97s/it]

training loss: 2.8467283248901367
training loss: 2.914041042327881
validation loss: 2.890493631362915
training loss: 2.859773874282837


training:  45%|██████████████████████████████████████████████████████▍                                                                 | 411/907 [14:29<33:43,  4.08s/it]

training loss: 2.9205410480499268
training loss: 2.91880202293396
training loss: 2.8919358253479004


training:  46%|██████████████████████████████████████████████████████▊                                                                 | 414/907 [14:43<34:39,  4.22s/it]

training loss: 2.8453667163848877
training loss: 2.8821399211883545
training loss: 2.851292610168457


training:  46%|███████████████████████████████████████████████████████▏                                                                | 417/907 [14:58<36:16,  4.44s/it]

training loss: 2.8882081508636475
training loss: 2.866947650909424
training loss: 2.894253969192505


training:  46%|███████████████████████████████████████████████████████▌                                                                | 420/907 [15:13<38:02,  4.69s/it]

training loss: 2.8569939136505127
training loss: 2.8425443172454834
validation loss: 2.8599438667297363


training:  47%|███████████████████████████████████████████████████████▊                                                                | 422/907 [15:25<40:55,  5.06s/it]

training loss: 2.8732337951660156
training loss: 2.8874034881591797


training:  47%|████████████████████████████████████████████████████████                                                                | 424/907 [15:37<42:13,  5.25s/it]

training loss: 2.883471727371216
training loss: 2.934460401535034
training loss: 2.9040002822875977


training:  47%|████████████████████████████████████████████████████████▍                                                               | 427/907 [15:49<39:27,  4.93s/it]

training loss: 2.8593451976776123
training loss: 2.871608257293701


training:  47%|████████████████████████████████████████████████████████▊                                                               | 429/907 [16:01<41:50,  5.25s/it]

training loss: 2.8682687282562256
training loss: 2.8970866203308105


training:  48%|█████████████████████████████████████████████████████████                                                               | 431/907 [16:15<45:20,  5.71s/it]

training loss: 2.908369302749634
training loss: 2.9058778285980225
training loss: 2.871192693710327
validation loss: 2.8877532482147217


training:  48%|█████████████████████████████████████████████████████████▍                                                              | 434/907 [16:27<41:02,  5.21s/it]

training loss: 2.8871190547943115
training loss: 2.8920624256134033
training loss: 2.9093363285064697


training:  48%|█████████████████████████████████████████████████████████▊                                                              | 437/907 [16:41<39:33,  5.05s/it]

training loss: 2.8680992126464844
training loss: 2.8707926273345947
training loss: 2.877519369125366


training:  49%|██████████████████████████████████████████████████████████▏                                                             | 440/907 [16:51<35:28,  4.56s/it]

training loss: 2.8931539058685303
training loss: 2.894622564315796
training loss: 2.8563108444213867
training loss: 2.941601037979126
training loss: 2.898416757583618
training loss: 2.8710291385650635
validation loss: 2.8941941261291504


training:  49%|███████████████████████████████████████████████████████████                                                             | 446/907 [17:02<28:47,  3.75s/it]

training loss: 2.887809991836548
training loss: 2.9120869636535645
training loss: 2.8747076988220215
training loss: 2.9191172122955322
training loss: 2.901085376739502
training loss: 2.8063313961029053
training loss: 2.8840551376342773
training loss: 2.862076759338379


training:  50%|████████████████████████████████████████████████████████████                                                            | 454/907 [17:13<22:42,  3.01s/it]

training loss: 2.8442230224609375
training loss: 2.8817622661590576
training loss: 2.8695931434631348
training loss: 2.8584423065185547
validation loss: 2.8829400539398193
training loss: 2.862881898880005
training loss: 2.9299707412719727
training loss: 2.8885610103607178
training loss: 2.882766008377075


training:  51%|█████████████████████████████████████████████████████████████                                                           | 462/907 [17:23<18:29,  2.49s/it]

training loss: 2.8781750202178955
training loss: 2.881192207336426
training loss: 2.8842263221740723
training loss: 2.9379141330718994
training loss: 2.902104616165161
training loss: 2.890932321548462
training loss: 2.839877128601074


training:  51%|█████████████████████████████████████████████████████████████                                                           | 462/907 [17:40<18:29,  2.49s/it]

training loss: 2.8700778484344482


training:  52%|██████████████████████████████████████████████████████████████                                                          | 469/907 [17:42<18:49,  2.58s/it]

validation loss: 2.890625238418579
training loss: 2.825916051864624
training loss: 2.8632688522338867
training loss: 2.850900650024414


training:  52%|██████████████████████████████████████████████████████████████▌                                                         | 473/907 [18:02<23:37,  3.27s/it]

training loss: 2.830132246017456
training loss: 2.869417667388916
training loss: 2.8860385417938232


training:  52%|██████████████████████████████████████████████████████████████▉                                                         | 476/907 [18:17<26:58,  3.76s/it]

training loss: 2.8730335235595703
training loss: 2.889604330062866
training loss: 2.8919005393981934


training:  53%|███████████████████████████████████████████████████████████████▎                                                        | 479/907 [18:29<27:42,  3.88s/it]

training loss: 2.8816471099853516
training loss: 2.8979570865631104


training:  53%|███████████████████████████████████████████████████████████████▎                                                        | 479/907 [18:40<27:42,  3.88s/it]

training loss: 2.893857955932617


training:  53%|███████████████████████████████████████████████████████████████▋                                                        | 481/907 [18:41<31:31,  4.44s/it]

validation loss: 2.905850648880005
training loss: 2.871314525604248
training loss: 2.859135866165161


training:  53%|████████████████████████████████████████████████████████████████                                                        | 484/907 [18:53<30:51,  4.38s/it]

training loss: 2.860646963119507
training loss: 2.870891571044922
training loss: 2.8786888122558594


training:  54%|████████████████████████████████████████████████████████████████▍                                                       | 487/907 [19:08<31:44,  4.54s/it]

training loss: 2.896291494369507
training loss: 2.8925535678863525
training loss: 2.836721420288086


training:  54%|████████████████████████████████████████████████████████████████▊                                                       | 490/907 [19:23<32:41,  4.70s/it]

training loss: 2.8984439373016357
training loss: 2.8582687377929688


training:  54%|█████████████████████████████████████████████████████████████████                                                       | 492/907 [19:34<34:00,  4.92s/it]

training loss: 2.8837718963623047
training loss: 2.883941650390625
validation loss: 2.8908615112304688


training:  54%|█████████████████████████████████████████████████████████████████▎                                                      | 494/907 [19:45<34:40,  5.04s/it]

training loss: 2.858071804046631
training loss: 2.851457118988037


training:  55%|█████████████████████████████████████████████████████████████████▌                                                      | 496/907 [19:56<35:28,  5.18s/it]

training loss: 2.898754835128784
training loss: 2.8634536266326904
training loss: 2.855520248413086


training:  55%|██████████████████████████████████████████████████████████████████                                                      | 499/907 [20:11<35:06,  5.16s/it]

training loss: 2.8537509441375732
training loss: 2.8785648345947266


training:  55%|██████████████████████████████████████████████████████████████████▎                                                     | 501/907 [20:22<35:09,  5.19s/it]

training loss: 2.8268802165985107
training loss: 2.896286964416504


training:  55%|██████████████████████████████████████████████████████████████████▌                                                     | 503/907 [20:32<34:36,  5.14s/it]

training loss: 2.915731430053711
training loss: 2.9185314178466797
training loss: 2.86215877532959
validation loss: 2.922386407852173
training loss: 2.8838071823120117


training:  56%|███████████████████████████████████████████████████████████████████                                                     | 507/907 [20:42<29:13,  4.38s/it]

training loss: 2.868546724319458
training loss: 2.9478397369384766
training loss: 2.845562696456909
training loss: 2.881486654281616
training loss: 2.885241985321045
training loss: 2.9351656436920166
training loss: 2.8708910942077637
training loss: 2.9202795028686523


training:  57%|████████████████████████████████████████████████████████████████████▏                                                   | 515/907 [20:52<22:34,  3.46s/it]

training loss: 2.9101176261901855
training loss: 2.836496353149414
training loss: 2.91650652885437
validation loss: 2.888563632965088
training loss: 2.9219329357147217
training loss: 2.8524274826049805
training loss: 2.8989362716674805
training loss: 2.9400899410247803
training loss: 2.9052951335906982


training:  58%|█████████████████████████████████████████████████████████████████████▏                                                  | 523/907 [21:05<18:33,  2.90s/it]

training loss: 2.851081609725952
training loss: 2.908957004547119
training loss: 2.8813862800598145
training loss: 2.8298563957214355
training loss: 2.836055040359497
training loss: 2.880836248397827
training loss: 2.909167528152466
validation loss: 2.8584437370300293
training loss: 2.9152519702911377


training:  59%|██████████████████████████████████████████████████████████████████████▎                                                 | 531/907 [21:16<15:10,  2.42s/it]

training loss: 2.8500618934631348
training loss: 2.879706859588623
training loss: 2.9130334854125977
training loss: 2.8998324871063232
training loss: 2.8627171516418457
training loss: 2.912038564682007
training loss: 2.8848962783813477
training loss: 2.9336459636688232


training:  59%|███████████████████████████████████████████████████████████████████████▎                                                | 539/907 [21:31<13:52,  2.26s/it]

training loss: 2.8787779808044434
training loss: 2.8698182106018066
training loss: 2.9086263179779053
validation loss: 2.880779266357422
training loss: 2.895552396774292


training:  60%|███████████████████████████████████████████████████████████████████████▊                                                | 543/907 [21:51<18:50,  3.11s/it]

training loss: 2.8443167209625244
training loss: 2.9161977767944336


training:  60%|████████████████████████████████████████████████████████████████████████                                                | 545/907 [22:02<22:29,  3.73s/it]

training loss: 2.889155149459839
training loss: 2.8950893878936768


training:  60%|████████████████████████████████████████████████████████████████████████▎                                               | 547/907 [22:12<24:49,  4.14s/it]

training loss: 2.875265598297119
training loss: 2.880950927734375
training loss: 2.8753132820129395
training loss: 2.8574979305267334
training loss: 2.8505284786224365


training:  61%|█████████████████████████████████████████████████████████████████████████                                               | 552/907 [22:23<21:03,  3.56s/it]

training loss: 2.910949945449829
training loss: 2.880234718322754
validation loss: 2.892779588699341
training loss: 2.8863823413848877
training loss: 2.8903486728668213


training:  61%|█████████████████████████████████████████████████████████████████████████▌                                              | 556/907 [22:43<23:34,  4.03s/it]

training loss: 2.893415927886963
training loss: 2.8487844467163086
training loss: 2.9121322631835938


training:  62%|█████████████████████████████████████████████████████████████████████████▉                                              | 559/907 [22:57<24:13,  4.18s/it]

training loss: 2.8975863456726074
training loss: 2.868427276611328
training loss: 2.8934433460235596


training:  62%|██████████████████████████████████████████████████████████████████████████▎                                             | 562/907 [23:10<24:07,  4.19s/it]

training loss: 2.8555099964141846
training loss: 2.894153594970703


training:  62%|██████████████████████████████████████████████████████████████████████████▌                                             | 564/907 [23:20<25:44,  4.50s/it]

training loss: 2.8792712688446045
training loss: 2.890664577484131
validation loss: 2.897091865539551
training loss: 2.843693494796753


training:  63%|███████████████████████████████████████████████████████████████████████████                                             | 567/907 [23:33<25:26,  4.49s/it]

training loss: 2.8980836868286133
training loss: 2.895920991897583
training loss: 2.876096725463867
training loss: 2.8937113285064697
training loss: 2.8559181690216064


training:  63%|███████████████████████████████████████████████████████████████████████████▋                                            | 572/907 [23:45<21:19,  3.82s/it]

training loss: 2.8870527744293213
training loss: 2.8969249725341797
training loss: 2.8467185497283936
training loss: 2.8720271587371826


training:  64%|████████████████████████████████████████████████████████████████████████████▏                                           | 576/907 [24:03<22:27,  4.07s/it]

training loss: 2.901853561401367
training loss: 2.8914101123809814
validation loss: 2.9463651180267334
training loss: 2.9117090702056885


training:  64%|████████████████████████████████████████████████████████████████████████████▌                                           | 579/907 [24:20<24:26,  4.47s/it]

training loss: 2.87294864654541
training loss: 2.8346147537231445


training:  64%|████████████████████████████████████████████████████████████████████████████▊                                           | 581/907 [24:31<26:11,  4.82s/it]

training loss: 2.8545286655426025
training loss: 2.9028477668762207


training:  64%|█████████████████████████████████████████████████████████████████████████████▏                                          | 583/907 [24:41<26:49,  4.97s/it]

training loss: 2.8931632041931152
training loss: 2.8356707096099854


training:  64%|█████████████████████████████████████████████████████████████████████████████▍                                          | 585/907 [24:51<26:46,  4.99s/it]

training loss: 2.8427774906158447
training loss: 2.89929461479187


training:  65%|█████████████████████████████████████████████████████████████████████████████▋                                          | 587/907 [25:02<27:10,  5.10s/it]

training loss: 2.8517885208129883
training loss: 2.885307788848877
training loss: 2.8514273166656494
validation loss: 2.8419013023376465


training:  65%|██████████████████████████████████████████████████████████████████████████████                                          | 590/907 [25:15<25:41,  4.86s/it]

training loss: 2.865323305130005
training loss: 2.8134171962738037
training loss: 2.933974504470825


training:  65%|██████████████████████████████████████████████████████████████████████████████▍                                         | 593/907 [25:28<24:19,  4.65s/it]

training loss: 2.8792192935943604
training loss: 2.876804828643799
training loss: 2.88598370552063


training:  66%|██████████████████████████████████████████████████████████████████████████████▊                                         | 596/907 [25:41<23:47,  4.59s/it]

training loss: 2.9031829833984375
training loss: 2.8483994007110596
training loss: 2.8856618404388428


training:  66%|███████████████████████████████████████████████████████████████████████████████▎                                        | 599/907 [25:54<23:20,  4.55s/it]

training loss: 2.883840322494507
training loss: 2.902817487716675
training loss: 2.895336627960205
validation loss: 2.8573052883148193


training:  66%|███████████████████████████████████████████████████████████████████████████████▋                                        | 602/907 [26:10<24:05,  4.74s/it]

training loss: 2.8719911575317383
training loss: 2.8657031059265137
training loss: 2.8521323204040527


training:  67%|████████████████████████████████████████████████████████████████████████████████                                        | 605/907 [26:24<24:01,  4.77s/it]

training loss: 2.870455741882324
training loss: 2.8873636722564697
training loss: 2.839672327041626


training:  67%|████████████████████████████████████████████████████████████████████████████████▍                                       | 608/907 [26:40<24:19,  4.88s/it]

training loss: 2.8800811767578125
training loss: 2.817471981048584
training loss: 2.8741159439086914


training:  67%|████████████████████████████████████████████████████████████████████████████████▊                                       | 611/907 [26:54<24:03,  4.88s/it]

training loss: 2.920530319213867
training loss: 2.8737430572509766
training loss: 2.906667470932007
validation loss: 2.9331138134002686


training:  68%|█████████████████████████████████████████████████████████████████████████████████▏                                      | 614/907 [27:08<23:24,  4.79s/it]

training loss: 2.859630584716797
training loss: 2.869201898574829
training loss: 2.9347262382507324


training:  68%|█████████████████████████████████████████████████████████████████████████████████▋                                      | 617/907 [27:22<22:49,  4.72s/it]

training loss: 2.8871805667877197
training loss: 2.85536527633667
training loss: 2.9524872303009033


training:  68%|██████████████████████████████████████████████████████████████████████████████████                                      | 620/907 [27:35<22:16,  4.66s/it]

training loss: 2.9555747509002686
training loss: 2.8996691703796387
training loss: 2.8757684230804443


training:  69%|██████████████████████████████████████████████████████████████████████████████████▍                                     | 623/907 [27:49<21:50,  4.61s/it]

training loss: 2.8588736057281494
training loss: 2.867924213409424
training loss: 2.8502917289733887
validation loss: 2.8635408878326416


training:  69%|██████████████████████████████████████████████████████████████████████████████████▊                                     | 626/907 [28:03<21:38,  4.62s/it]

training loss: 2.8993470668792725
training loss: 2.918877601623535
training loss: 2.9074554443359375


training:  69%|███████████████████████████████████████████████████████████████████████████████████▏                                    | 629/907 [28:16<21:16,  4.59s/it]

training loss: 2.888015031814575
training loss: 2.8698487281799316
training loss: 2.851712942123413


training:  70%|███████████████████████████████████████████████████████████████████████████████████▌                                    | 632/907 [28:30<20:56,  4.57s/it]

training loss: 2.8831517696380615
training loss: 2.8957526683807373
training loss: 2.8421242237091064


training:  70%|████████████████████████████████████████████████████████████████████████████████████                                    | 635/907 [28:44<20:38,  4.55s/it]

training loss: 2.879925489425659
training loss: 2.8905279636383057
training loss: 2.87764835357666
validation loss: 2.8740246295928955


training:  70%|████████████████████████████████████████████████████████████████████████████████████▍                                   | 638/907 [28:57<20:32,  4.58s/it]

training loss: 2.8563790321350098
training loss: 2.915262222290039
training loss: 2.9800281524658203


training:  71%|████████████████████████████████████████████████████████████████████████████████████▊                                   | 641/907 [29:11<20:13,  4.56s/it]

training loss: 2.911316394805908
training loss: 2.881932258605957
training loss: 2.8568215370178223


training:  71%|█████████████████████████████████████████████████████████████████████████████████████▏                                  | 644/907 [29:25<19:57,  4.55s/it]

training loss: 2.8359568119049072
training loss: 2.870682716369629
training loss: 2.8915231227874756


training:  71%|█████████████████████████████████████████████████████████████████████████████████████▌                                  | 647/907 [29:38<19:41,  4.54s/it]

training loss: 2.8941659927368164
training loss: 2.8152472972869873
training loss: 2.8748090267181396
validation loss: 2.847166061401367


training:  72%|█████████████████████████████████████████████████████████████████████████████████████▉                                  | 650/907 [29:52<19:34,  4.57s/it]

training loss: 2.879742383956909
training loss: 2.8657350540161133
training loss: 2.9293296337127686


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▍                                 | 653/907 [30:06<19:17,  4.56s/it]

training loss: 2.8801350593566895
training loss: 2.8586807250976562
training loss: 2.949192762374878


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▊                                 | 656/907 [30:19<19:00,  4.55s/it]

training loss: 2.9184377193450928
training loss: 2.8668854236602783
training loss: 2.9088902473449707


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▏                                | 659/907 [30:33<18:45,  4.54s/it]

training loss: 2.86435604095459
training loss: 2.927790641784668
training loss: 2.9255640506744385
validation loss: 2.8766393661499023


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▌                                | 662/907 [30:47<18:39,  4.57s/it]

training loss: 2.862375497817993
training loss: 2.8110101222991943
training loss: 2.846287727355957


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▉                                | 665/907 [31:00<18:21,  4.55s/it]

training loss: 2.9181079864501953
training loss: 2.892242908477783
training loss: 2.897073745727539


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▍                               | 668/907 [31:14<18:05,  4.54s/it]

training loss: 2.8840503692626953
training loss: 2.915344715118408
training loss: 2.8624744415283203


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▊                               | 671/907 [31:27<17:49,  4.53s/it]

training loss: 2.8690707683563232
training loss: 2.851309061050415
training loss: 2.8464770317077637
validation loss: 2.8938019275665283


training:  74%|█████████████████████████████████████████████████████████████████████████████████████████▏                              | 674/907 [31:41<17:42,  4.56s/it]

training loss: 2.919443368911743
training loss: 2.8900952339172363
training loss: 2.8734560012817383


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▌                              | 677/907 [31:55<17:25,  4.55s/it]

training loss: 2.893939733505249
training loss: 2.87458872795105
training loss: 2.9037742614746094


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▉                              | 680/907 [32:08<17:09,  4.54s/it]

training loss: 2.900723695755005
training loss: 2.9050073623657227
training loss: 2.84314227104187


training:  75%|██████████████████████████████████████████████████████████████████████████████████████████▎                             | 683/907 [32:22<16:54,  4.53s/it]

training loss: 2.882084846496582
training loss: 2.8913497924804688
training loss: 2.9047110080718994
validation loss: 2.893441677093506


training:  76%|██████████████████████████████████████████████████████████████████████████████████████████▊                             | 686/907 [32:36<16:48,  4.56s/it]

training loss: 2.903806447982788
training loss: 2.886535882949829
training loss: 2.844341278076172


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▏                            | 689/907 [32:49<16:26,  4.52s/it]

training loss: 2.835916757583618
training loss: 2.8848876953125
training loss: 2.8792991638183594


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▌                            | 692/907 [33:02<16:07,  4.50s/it]

training loss: 2.8577523231506348
training loss: 2.897575855255127
training loss: 2.9259963035583496


training:  77%|███████████████████████████████████████████████████████████████████████████████████████████▉                            | 695/907 [33:16<15:55,  4.51s/it]

training loss: 2.9331231117248535
training loss: 2.8230772018432617
training loss: 2.880519151687622
validation loss: 2.852851152420044


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▎                           | 698/907 [33:30<15:49,  4.55s/it]

training loss: 2.8950135707855225
training loss: 2.8644986152648926
training loss: 2.8411920070648193


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▋                           | 701/907 [33:43<15:33,  4.53s/it]

training loss: 2.913834810256958
training loss: 2.877959966659546
training loss: 2.917069673538208


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▏                          | 704/907 [33:57<15:18,  4.52s/it]

training loss: 2.8630762100219727
training loss: 2.8860182762145996
training loss: 2.833221673965454


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▌                          | 707/907 [34:10<15:03,  4.52s/it]

training loss: 2.849482774734497
training loss: 2.9051904678344727
training loss: 2.8894457817077637
validation loss: 2.927018165588379


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▉                          | 710/907 [34:24<14:57,  4.55s/it]

training loss: 2.9095962047576904
training loss: 2.855168342590332
training loss: 2.90022873878479


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▎                         | 713/907 [34:38<14:41,  4.54s/it]

training loss: 2.89271879196167
training loss: 2.861539602279663
training loss: 2.9456593990325928


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▋                         | 716/907 [34:51<14:25,  4.53s/it]

training loss: 2.8960862159729004
training loss: 2.850437641143799
training loss: 2.8655316829681396


training:  79%|███████████████████████████████████████████████████████████████████████████████████████████████▏                        | 719/907 [35:05<14:10,  4.53s/it]

training loss: 2.874885082244873
training loss: 2.844062089920044
training loss: 2.9258179664611816
validation loss: 2.8751583099365234


training:  80%|███████████████████████████████████████████████████████████████████████████████████████████████▌                        | 722/907 [35:19<14:03,  4.56s/it]

training loss: 2.8241236209869385
training loss: 2.860442876815796
training loss: 2.880701780319214


training:  80%|███████████████████████████████████████████████████████████████████████████████████████████████▉                        | 725/907 [35:32<13:47,  4.55s/it]

training loss: 2.935576915740967
training loss: 2.8823883533477783
training loss: 2.8421730995178223


training:  80%|████████████████████████████████████████████████████████████████████████████████████████████████▎                       | 728/907 [35:46<13:31,  4.53s/it]

training loss: 2.881112813949585
training loss: 2.8794612884521484
training loss: 2.8874807357788086


training:  81%|████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 731/907 [35:59<13:16,  4.53s/it]

training loss: 2.8884544372558594
training loss: 2.8748934268951416
training loss: 2.871425151824951
validation loss: 2.8360869884490967


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████                       | 734/907 [36:13<13:09,  4.56s/it]

training loss: 2.8859360218048096
training loss: 2.848968982696533
training loss: 2.900202512741089


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████▌                      | 737/907 [36:27<12:52,  4.55s/it]

training loss: 2.8958678245544434
training loss: 2.8865396976470947
training loss: 2.8781216144561768


training:  82%|█████████████████████████████████████████████████████████████████████████████████████████████████▉                      | 740/907 [36:40<12:37,  4.54s/it]

training loss: 2.939389228820801
training loss: 2.9038093090057373
training loss: 2.826831102371216


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▎                     | 743/907 [36:54<12:23,  4.53s/it]

training loss: 2.887540102005005
training loss: 2.860321044921875
training loss: 2.8806135654449463
validation loss: 2.879859447479248


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 746/907 [37:08<12:14,  4.56s/it]

training loss: 2.8572843074798584
training loss: 2.8230090141296387
training loss: 2.8683931827545166


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████                     | 749/907 [37:21<11:58,  4.55s/it]

training loss: 2.897517681121826
training loss: 2.851323127746582
training loss: 2.9547793865203857


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▍                    | 752/907 [37:35<11:43,  4.54s/it]

training loss: 2.8668789863586426
training loss: 2.912353754043579
training loss: 2.8638150691986084


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 755/907 [37:48<11:28,  4.53s/it]

training loss: 2.883880853652954
training loss: 2.894017457962036
training loss: 2.826756238937378
validation loss: 2.871279716491699


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                   | 758/907 [38:02<11:20,  4.57s/it]

training loss: 2.8765408992767334
training loss: 2.9116127490997314
training loss: 2.8661701679229736


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 761/907 [38:16<11:04,  4.55s/it]

training loss: 2.8577229976654053
training loss: 2.9191367626190186
training loss: 2.9109604358673096


training:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████                   | 764/907 [38:30<10:49,  4.54s/it]

training loss: 2.8909952640533447
training loss: 2.8654556274414062
training loss: 2.8634390830993652


training:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████▍                  | 767/907 [38:43<10:34,  4.53s/it]

training loss: 2.8334743976593018
training loss: 2.891731023788452
training loss: 2.8987574577331543
validation loss: 2.9038124084472656


training:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████▊                  | 770/907 [38:57<10:25,  4.56s/it]

training loss: 2.8478760719299316
training loss: 2.87601900100708
training loss: 2.873746871948242


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████▎                 | 773/907 [39:11<10:10,  4.55s/it]

training loss: 2.8761532306671143
training loss: 2.879194736480713
training loss: 2.933920383453369


training:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                 | 776/907 [39:24<09:55,  4.54s/it]

training loss: 2.8945364952087402
training loss: 2.9095630645751953
training loss: 2.8384196758270264


training:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████                 | 779/907 [39:38<09:40,  4.53s/it]

training loss: 2.8926916122436523
training loss: 2.8937325477600098
training loss: 2.87843918800354
validation loss: 2.868037700653076


training:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                | 782/907 [39:52<09:30,  4.56s/it]

training loss: 2.8849940299987793
training loss: 2.8757951259613037
training loss: 2.869260549545288


training:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 785/907 [40:05<09:15,  4.55s/it]

training loss: 2.900930643081665
training loss: 2.848928928375244
training loss: 2.8208305835723877


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 788/907 [40:19<09:00,  4.54s/it]

training loss: 2.8417775630950928
training loss: 2.8564834594726562
training loss: 2.8488266468048096


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋               | 791/907 [40:32<08:45,  4.53s/it]

training loss: 2.8351001739501953
training loss: 2.911334991455078
training loss: 2.959150552749634
validation loss: 2.9701242446899414


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████               | 794/907 [40:46<08:35,  4.56s/it]

training loss: 2.875399112701416
training loss: 2.8305299282073975
training loss: 2.848696231842041


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍              | 797/907 [41:00<08:20,  4.55s/it]

training loss: 2.8776628971099854
training loss: 2.8492822647094727
training loss: 2.890481472015381


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 800/907 [41:13<08:05,  4.54s/it]

training loss: 2.8566596508026123
training loss: 2.8324368000030518
training loss: 2.858917713165283


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 803/907 [41:27<07:51,  4.53s/it]

training loss: 2.881067991256714
training loss: 2.887155532836914
training loss: 2.8256514072418213
validation loss: 2.8836684226989746


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 806/907 [41:41<07:41,  4.57s/it]

training loss: 2.8433477878570557
training loss: 2.867330551147461
training loss: 2.889028310775757


training:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████             | 809/907 [41:54<07:25,  4.55s/it]

training loss: 2.917426586151123
training loss: 2.884826421737671
training loss: 2.86376690864563


training:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 812/907 [42:07<07:07,  4.50s/it]

training loss: 2.8739981651306152
training loss: 2.8588201999664307
training loss: 2.928274393081665


training:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 815/907 [42:21<06:54,  4.50s/it]

training loss: 2.9205963611602783
training loss: 2.8660757541656494
training loss: 2.8822736740112305
validation loss: 2.9171080589294434


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 818/907 [42:35<06:44,  4.55s/it]

training loss: 2.868997097015381
training loss: 2.8849613666534424
training loss: 2.8949129581451416


training:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 821/907 [42:48<06:29,  4.53s/it]

training loss: 2.889744520187378
training loss: 2.8857977390289307
training loss: 2.8942582607269287


training:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████           | 824/907 [43:02<06:15,  4.53s/it]

training loss: 2.941317081451416
training loss: 2.8748674392700195
training loss: 2.8803632259368896


training:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▍          | 827/907 [43:15<06:01,  4.52s/it]

training loss: 2.904575824737549
training loss: 2.93295955657959
training loss: 2.855548858642578
validation loss: 2.863267183303833


training:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 830/907 [43:29<05:50,  4.56s/it]

training loss: 2.848625898361206
training loss: 2.880112409591675
training loss: 2.922576665878296


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▏         | 833/907 [43:43<05:36,  4.54s/it]

training loss: 2.888126850128174
training loss: 2.996060609817505
training loss: 2.85243558883667


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 836/907 [43:56<05:22,  4.54s/it]

training loss: 2.889340400695801
training loss: 2.898106098175049
training loss: 2.920578956604004


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████         | 839/907 [44:10<05:08,  4.54s/it]

training loss: 2.9005675315856934
training loss: 2.999135971069336
training loss: 2.9038641452789307
validation loss: 2.9131860733032227


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 842/907 [44:24<04:57,  4.57s/it]

training loss: 2.8862547874450684
training loss: 2.9561996459960938
training loss: 2.868407726287842


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 845/907 [44:38<04:42,  4.56s/it]

training loss: 2.886780261993408
training loss: 2.8879072666168213
training loss: 2.9045486450195312


training:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 848/907 [44:51<04:28,  4.54s/it]

training loss: 2.8896164894104004
training loss: 2.8749887943267822
training loss: 2.8953843116760254


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌       | 851/907 [45:05<04:13,  4.53s/it]

training loss: 2.9019036293029785
training loss: 2.8705573081970215
training loss: 2.866406202316284
validation loss: 2.898745059967041


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉       | 854/907 [45:19<04:02,  4.57s/it]

training loss: 2.881739616394043
training loss: 2.832988977432251
training loss: 2.8588547706604004


training:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 857/907 [45:32<03:47,  4.55s/it]

training loss: 2.876213312149048
training loss: 2.88051176071167
training loss: 2.9076247215270996


training:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 860/907 [45:46<03:33,  4.54s/it]

training loss: 2.900827169418335
training loss: 2.8717434406280518
training loss: 2.9030473232269287


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 863/907 [45:59<03:18,  4.52s/it]

training loss: 2.8746142387390137
training loss: 2.8764700889587402
training loss: 2.881572961807251
validation loss: 2.8673605918884277


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 866/907 [46:14<03:10,  4.66s/it]

training loss: 2.831545829772949
training loss: 2.905294179916382
training loss: 2.866243600845337


training:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 869/907 [46:29<03:00,  4.74s/it]

training loss: 2.8871240615844727
training loss: 2.875664710998535
training loss: 2.8661208152770996


training:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎    | 872/907 [46:43<02:45,  4.72s/it]

training loss: 2.918262481689453
training loss: 2.899786949157715
training loss: 2.8804235458374023


training:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 875/907 [46:56<02:28,  4.64s/it]

training loss: 2.902435064315796
training loss: 2.8663196563720703
training loss: 2.840027332305908
validation loss: 2.9133780002593994


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 878/907 [47:13<02:22,  4.92s/it]

training loss: 2.8791210651397705
training loss: 2.8614113330841064


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍   | 880/907 [47:23<02:13,  4.96s/it]

training loss: 2.90397310256958
training loss: 2.862165927886963
training loss: 2.8446106910705566


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 883/907 [47:36<01:54,  4.75s/it]

training loss: 2.8598296642303467
training loss: 2.848392963409424
training loss: 2.8825151920318604


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 886/907 [47:48<01:35,  4.56s/it]

training loss: 2.89192795753479
training loss: 2.913593053817749
training loss: 2.8784685134887695


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏  | 886/907 [48:00<01:35,  4.56s/it]

training loss: 2.856696128845215


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 889/907 [48:01<01:21,  4.52s/it]

validation loss: 2.855607271194458
training loss: 2.886885404586792
training loss: 2.8609583377838135


training:  98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 892/907 [48:14<01:06,  4.43s/it]

training loss: 2.8759799003601074
training loss: 2.862726926803589
training loss: 2.935170888900757


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 895/907 [48:27<00:53,  4.43s/it]

training loss: 2.8822028636932373
training loss: 2.8571290969848633
training loss: 2.8269784450531006


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 898/907 [48:39<00:37,  4.22s/it]

training loss: 2.874829053878784
training loss: 2.8713080883026123
training loss: 2.9067089557647705


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 898/907 [48:50<00:37,  4.22s/it]

training loss: 2.891331434249878


training:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 901/907 [48:51<00:25,  4.22s/it]

validation loss: 2.895946741104126
training loss: 2.8725814819335938
training loss: 2.8670997619628906


training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌| 904/907 [49:03<00:12,  4.17s/it]

training loss: 2.8767988681793213
training loss: 2.8500494956970215
training loss: 2.858959674835205


training: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 907/907 [49:17<00:00,  3.26s/it]
training:   0%|                                                                                                                                  | 0/907 [00:00<?, ?it/s]

training loss: 2.8723864555358887
training loss: 2.8806915283203125
validation loss: 2.8677709102630615


training:   0%|▎                                                                                                                       | 2/907 [00:11<1:28:14,  5.85s/it]

training loss: 2.95686411857605
training loss: 2.837867021560669


training:   0%|▌                                                                                                                       | 4/907 [00:22<1:26:51,  5.77s/it]

training loss: 2.909059762954712
training loss: 2.8294880390167236


training:   1%|▊                                                                                                                       | 6/907 [00:33<1:25:31,  5.70s/it]

training loss: 2.8627936840057373
training loss: 2.8789477348327637


training:   1%|█                                                                                                                       | 8/907 [00:44<1:23:45,  5.59s/it]

training loss: 2.872927665710449
training loss: 2.884885549545288
training loss: 2.8832900524139404


training:   1%|█▍                                                                                                                     | 11/907 [00:56<1:15:47,  5.08s/it]

training loss: 2.8512632846832275
training loss: 2.8959813117980957
training loss: 2.8815195560455322
validation loss: 2.9196534156799316
training loss: 2.9164183139801025
training loss: 2.8880603313446045
training loss: 2.8580379486083984
training loss: 2.8960695266723633
training loss: 2.9214115142822266


training:   2%|██▌                                                                                                                      | 19/907 [01:06<58:17,  3.94s/it]

training loss: 2.8548967838287354
training loss: 2.82011079788208
training loss: 2.877761125564575
training loss: 2.886976957321167
training loss: 2.904635429382324


training:   3%|███▏                                                                                                                     | 24/907 [01:22<54:22,  3.70s/it]

training loss: 2.89713454246521
training loss: 2.906984329223633
validation loss: 2.9113857746124268
training loss: 2.886327028274536
training loss: 2.8924474716186523


training:   3%|███▋                                                                                                                     | 28/907 [01:39<57:14,  3.91s/it]

training loss: 2.8762502670288086
training loss: 2.8260693550109863
training loss: 2.842045307159424


training:   3%|████                                                                                                                   | 31/907 [01:53<1:00:04,  4.11s/it]

training loss: 2.887576103210449
training loss: 2.89847731590271
training loss: 2.8955366611480713


training:   4%|████▍                                                                                                                  | 34/907 [02:07<1:02:35,  4.30s/it]

training loss: 2.841635227203369
training loss: 2.891977310180664
training loss: 2.902672052383423


training:   4%|████▍                                                                                                                  | 34/907 [02:19<1:02:35,  4.30s/it]

training loss: 2.887831926345825


training:   4%|████▊                                                                                                                  | 37/907 [02:23<1:06:39,  4.60s/it]

validation loss: 2.8596699237823486
training loss: 2.856755495071411
training loss: 2.90046763420105


training:   4%|█████▏                                                                                                                 | 40/907 [02:38<1:08:08,  4.72s/it]

training loss: 2.852917194366455
training loss: 2.855360507965088
training loss: 2.93693470954895


training:   5%|█████▋                                                                                                                 | 43/907 [02:53<1:08:42,  4.77s/it]

training loss: 2.8750293254852295
training loss: 2.8714497089385986
training loss: 2.86580753326416


training:   5%|██████                                                                                                                 | 46/907 [03:07<1:08:34,  4.78s/it]

training loss: 2.8412692546844482
training loss: 2.8773274421691895
training loss: 2.890835762023926


training:   5%|██████                                                                                                                 | 46/907 [03:20<1:08:34,  4.78s/it]

training loss: 2.8902807235717773


training:   5%|██████▍                                                                                                                | 49/907 [03:22<1:09:06,  4.83s/it]

validation loss: 2.8475120067596436
training loss: 2.909929037094116
training loss: 2.8966917991638184


training:   6%|██████▊                                                                                                                | 52/907 [03:36<1:07:50,  4.76s/it]

training loss: 2.8526382446289062
training loss: 2.8575639724731445
training loss: 2.894542694091797


training:   6%|███████▏                                                                                                               | 55/907 [03:50<1:07:54,  4.78s/it]

training loss: 2.9162302017211914
training loss: 2.828655958175659
training loss: 2.916799783706665


training:   6%|███████▌                                                                                                               | 58/907 [04:05<1:08:31,  4.84s/it]

training loss: 2.9025230407714844
training loss: 2.860388994216919
training loss: 2.8598239421844482


training:   6%|███████▌                                                                                                               | 58/907 [04:20<1:08:31,  4.84s/it]

training loss: 2.8531134128570557


training:   7%|████████                                                                                                               | 61/907 [04:21<1:09:58,  4.96s/it]

validation loss: 2.8856046199798584
training loss: 2.882751703262329
training loss: 2.8623037338256836


training:   7%|████████▍                                                                                                              | 64/907 [04:36<1:09:07,  4.92s/it]

training loss: 2.8762948513031006
training loss: 2.8840980529785156
training loss: 2.8441407680511475


training:   7%|████████▊                                                                                                              | 67/907 [04:50<1:08:00,  4.86s/it]

training loss: 2.8389554023742676
training loss: 2.925360918045044
training loss: 2.83066463470459


training:   8%|█████████▏                                                                                                             | 70/907 [05:03<1:05:51,  4.72s/it]

training loss: 2.8840041160583496
training loss: 2.9027979373931885
training loss: 2.887483596801758
training loss: 2.842219114303589


training:   8%|█████████▌                                                                                                             | 73/907 [05:13<1:00:20,  4.34s/it]

validation loss: 2.8864753246307373
training loss: 2.8873181343078613
training loss: 2.8548431396484375


training:   8%|██████████▏                                                                                                              | 76/907 [05:25<58:22,  4.22s/it]

training loss: 2.886545419692993
training loss: 2.8370754718780518
training loss: 2.8563761711120605


training:   9%|██████████▌                                                                                                              | 79/907 [05:38<58:11,  4.22s/it]

training loss: 2.8642330169677734
training loss: 2.8368842601776123
training loss: 2.930774450302124


training:   9%|██████████▉                                                                                                              | 82/907 [05:50<57:13,  4.16s/it]

training loss: 2.8486502170562744
training loss: 2.88303804397583
training loss: 2.908520460128784
training loss: 2.886392593383789


training:   9%|███████████▎                                                                                                             | 85/907 [06:04<59:58,  4.38s/it]

validation loss: 2.8417108058929443
training loss: 2.887148380279541
training loss: 2.8702659606933594


training:  10%|███████████▋                                                                                                             | 88/907 [06:15<56:26,  4.13s/it]

training loss: 2.9245827198028564
training loss: 2.879877805709839
training loss: 2.8578925132751465


training:  10%|████████████▏                                                                                                            | 91/907 [06:27<55:22,  4.07s/it]

training loss: 2.8821358680725098
training loss: 2.845935106277466
training loss: 2.9146978855133057


training:  10%|████████████▌                                                                                                            | 94/907 [06:41<57:53,  4.27s/it]

training loss: 2.9199459552764893
training loss: 2.886206865310669
training loss: 2.8400230407714844
training loss: 2.8751261234283447


training:  11%|████████████▋                                                                                                          | 97/907 [06:56<1:00:20,  4.47s/it]

validation loss: 2.854792594909668
training loss: 2.8069920539855957
training loss: 2.886305332183838


training:  11%|█████████████                                                                                                         | 100/907 [07:09<1:00:05,  4.47s/it]

training loss: 2.8920791149139404
training loss: 2.8735663890838623
training loss: 2.8941662311553955


training:  11%|█████████████▍                                                                                                        | 103/907 [07:23<1:00:38,  4.53s/it]

training loss: 2.85788631439209
training loss: 2.8667521476745605
training loss: 2.8781991004943848


training:  12%|██████████████                                                                                                          | 106/907 [07:34<56:14,  4.21s/it]

training loss: 2.8814384937286377
training loss: 2.9316887855529785
training loss: 2.8935322761535645
training loss: 2.8642303943634033


training:  12%|██████████████▏                                                                                                       | 109/907 [07:50<1:00:17,  4.53s/it]

validation loss: 2.8621280193328857
training loss: 2.8970043659210205
training loss: 2.8353426456451416


training:  12%|██████████████▌                                                                                                       | 112/907 [08:03<1:00:22,  4.56s/it]

training loss: 2.894551992416382
training loss: 2.850491762161255
training loss: 2.8657824993133545


training:  13%|██████████████▉                                                                                                       | 115/907 [08:19<1:03:09,  4.78s/it]

training loss: 2.8876492977142334
training loss: 2.890836715698242


training:  13%|███████████████▏                                                                                                      | 117/907 [08:31<1:06:32,  5.05s/it]

training loss: 2.8743062019348145
training loss: 2.9030299186706543
training loss: 2.852228879928589


training:  13%|███████████████▌                                                                                                      | 120/907 [08:44<1:04:07,  4.89s/it]

training loss: 2.868926763534546
training loss: 2.8629987239837646
validation loss: 2.8747620582580566
training loss: 2.867826461791992


training:  14%|████████████████                                                                                                      | 123/907 [08:58<1:03:09,  4.83s/it]

training loss: 2.903529405593872
training loss: 2.8499205112457275
training loss: 2.8958120346069336


training:  14%|████████████████▍                                                                                                     | 126/907 [09:11<1:00:45,  4.67s/it]

training loss: 2.9075303077697754
training loss: 2.884694814682007
training loss: 2.8749752044677734


training:  14%|████████████████▊                                                                                                     | 129/907 [09:25<1:00:06,  4.64s/it]

training loss: 2.908038377761841
training loss: 2.863194227218628
training loss: 2.9162003993988037


training:  15%|█████████████████▍                                                                                                      | 132/907 [09:38<59:21,  4.60s/it]

training loss: 2.8928451538085938
training loss: 2.8170924186706543
validation loss: 2.8544254302978516
training loss: 2.868539810180664


training:  15%|█████████████████▊                                                                                                      | 135/907 [09:52<59:15,  4.61s/it]

training loss: 2.888648509979248
training loss: 2.887176275253296
training loss: 2.879107713699341


training:  15%|██████████████████▎                                                                                                     | 138/907 [10:06<59:00,  4.60s/it]

training loss: 2.857402801513672
training loss: 2.8937129974365234
training loss: 2.8661346435546875
training loss: 2.9190595149993896
training loss: 2.886521339416504
training loss: 2.87221360206604


training:  16%|███████████████████                                                                                                     | 144/907 [10:18<48:33,  3.82s/it]

training loss: 2.871175527572632
training loss: 2.8646786212921143
validation loss: 2.880965232849121
training loss: 2.859175682067871


training:  16%|███████████████████▍                                                                                                    | 147/907 [10:34<53:39,  4.24s/it]

training loss: 2.899077892303467
training loss: 2.7864456176757812


training:  16%|███████████████████▋                                                                                                    | 149/907 [10:44<57:01,  4.51s/it]

training loss: 2.83414888381958
training loss: 2.9083077907562256
training loss: 2.8832767009735107


training:  17%|████████████████████                                                                                                    | 152/907 [10:58<57:48,  4.59s/it]

training loss: 2.823681592941284
training loss: 2.869765520095825
training loss: 2.8451426029205322
training loss: 2.8309781551361084


training:  17%|████████████████████▋                                                                                                   | 156/907 [11:09<50:10,  4.01s/it]

training loss: 2.869004726409912
training loss: 2.8798725605010986
validation loss: 2.872776746749878
training loss: 2.8417305946350098


training:  18%|█████████████████████                                                                                                   | 159/907 [11:22<51:05,  4.10s/it]

training loss: 2.8828349113464355
training loss: 2.8492417335510254
training loss: 2.909348726272583


training:  18%|█████████████████████▍                                                                                                  | 162/907 [11:32<48:06,  3.87s/it]

training loss: 2.8874244689941406
training loss: 2.8974528312683105
training loss: 2.873659372329712


training:  18%|█████████████████████▊                                                                                                  | 165/907 [11:47<52:12,  4.22s/it]

training loss: 2.8605446815490723
training loss: 2.855814218521118
training loss: 2.870746612548828


training:  19%|██████████████████████▏                                                                                                 | 168/907 [12:02<55:03,  4.47s/it]

training loss: 2.8732917308807373
training loss: 2.889981985092163
validation loss: 2.896980047225952


training:  19%|██████████████████████▍                                                                                                 | 170/907 [12:13<59:21,  4.83s/it]

training loss: 2.892671585083008
training loss: 2.894155263900757


training:  19%|██████████████████████▍                                                                                               | 172/907 [12:24<1:01:13,  5.00s/it]

training loss: 2.879079580307007
training loss: 2.86549973487854


training:  19%|██████████████████████▋                                                                                               | 174/907 [12:34<1:01:35,  5.04s/it]

training loss: 2.86417555809021
training loss: 2.880214214324951


training:  19%|██████████████████████▉                                                                                               | 176/907 [12:45<1:02:11,  5.10s/it]

training loss: 2.8565144538879395
training loss: 2.843388080596924


training:  20%|███████████████████████▏                                                                                              | 178/907 [12:56<1:04:07,  5.28s/it]

training loss: 2.8883190155029297
training loss: 2.874441385269165


training:  20%|███████████████████████▍                                                                                              | 180/907 [13:08<1:05:20,  5.39s/it]

training loss: 2.8498919010162354
training loss: 2.854449987411499
validation loss: 2.8464791774749756
training loss: 2.8687942028045654


training:  20%|███████████████████████▊                                                                                              | 183/907 [13:23<1:03:47,  5.29s/it]

training loss: 2.8701815605163574
training loss: 2.8898227214813232


training:  20%|████████████████████████                                                                                              | 185/907 [13:34<1:05:27,  5.44s/it]

training loss: 2.9083974361419678
training loss: 2.838184118270874


training:  21%|████████████████████████▎                                                                                             | 187/907 [13:45<1:05:43,  5.48s/it]

training loss: 2.924246311187744
training loss: 2.8738479614257812


training:  21%|████████████████████████▌                                                                                             | 189/907 [13:55<1:03:51,  5.34s/it]

training loss: 2.8700637817382812
training loss: 2.9444491863250732
training loss: 2.833932638168335
training loss: 2.886794328689575
training loss: 2.8878793716430664
validation loss: 2.907151937484741
training loss: 2.856328010559082


training:  21%|█████████████████████████▊                                                                                              | 195/907 [14:06<50:21,  4.24s/it]

training loss: 2.9080963134765625
training loss: 2.8694279193878174
training loss: 2.863358497619629
training loss: 2.862614631652832
training loss: 2.8818163871765137


training:  22%|██████████████████████████▍                                                                                             | 200/907 [14:23<47:04,  4.00s/it]

training loss: 2.9162328243255615
training loss: 2.857203483581543
training loss: 2.9031171798706055


training:  22%|██████████████████████████▊                                                                                             | 203/907 [14:35<47:32,  4.05s/it]

training loss: 2.926347255706787
training loss: 2.9041426181793213
training loss: 2.8480594158172607
validation loss: 2.86841082572937


training:  23%|███████████████████████████▎                                                                                            | 206/907 [14:48<47:52,  4.10s/it]

training loss: 2.896521806716919
training loss: 2.8450818061828613
training loss: 2.9522671699523926


training:  23%|███████████████████████████▋                                                                                            | 209/907 [15:01<48:26,  4.16s/it]

training loss: 2.8675105571746826
training loss: 2.8579869270324707
training loss: 2.855073928833008


training:  23%|████████████████████████████                                                                                            | 212/907 [15:13<48:23,  4.18s/it]

training loss: 2.905573844909668
training loss: 2.8349361419677734
training loss: 2.875328540802002


training:  24%|████████████████████████████▍                                                                                           | 215/907 [15:26<48:26,  4.20s/it]

training loss: 2.92303729057312
training loss: 2.896660566329956
training loss: 2.8533618450164795
validation loss: 2.9186086654663086


training:  24%|████████████████████████████▊                                                                                           | 218/907 [15:39<48:16,  4.20s/it]

training loss: 3.1470468044281006
training loss: 2.889678478240967
training loss: 2.900343418121338


training:  24%|█████████████████████████████▏                                                                                          | 221/907 [15:52<48:55,  4.28s/it]

training loss: 2.8832428455352783
training loss: 2.9306631088256836
training loss: 2.874176502227783


training:  25%|█████████████████████████████▋                                                                                          | 224/907 [16:06<49:44,  4.37s/it]

training loss: 2.8817594051361084
training loss: 2.826812267303467
training loss: 2.9133615493774414


training:  25%|██████████████████████████████                                                                                          | 227/907 [16:22<53:10,  4.69s/it]

training loss: 2.8754141330718994
training loss: 2.8882670402526855
training loss: 2.877598524093628
validation loss: 2.838893175125122
training loss: 2.870020866394043
training loss: 2.894770860671997


training:  26%|██████████████████████████████▋                                                                                         | 232/907 [16:33<44:01,  3.91s/it]

training loss: 2.891453742980957
training loss: 2.889988899230957
training loss: 2.8997137546539307
training loss: 2.8862602710723877
training loss: 2.8839879035949707
training loss: 2.885408878326416
training loss: 2.8977718353271484
training loss: 2.8579025268554688


training:  26%|███████████████████████████████▊                                                                                        | 240/907 [16:43<34:44,  3.12s/it]

training loss: 2.9058165550231934
training loss: 2.89070200920105
validation loss: 2.8863277435302734
training loss: 2.8532040119171143
training loss: 2.9244534969329834
training loss: 2.902346134185791
training loss: 2.9061949253082275
training loss: 2.8679935932159424
training loss: 2.8989367485046387


training:  27%|████████████████████████████████▊                                                                                       | 248/907 [16:58<30:16,  2.76s/it]

training loss: 2.8464999198913574
training loss: 2.896139144897461
training loss: 2.8888611793518066
training loss: 2.8653292655944824
training loss: 2.885887384414673


training:  27%|████████████████████████████████▊                                                                                       | 248/907 [17:10<30:16,  2.76s/it]

training loss: 2.854273557662964


training:  28%|█████████████████████████████████▍                                                                                      | 253/907 [17:10<28:52,  2.65s/it]

validation loss: 2.8624489307403564
training loss: 2.9166996479034424
training loss: 2.947448253631592
training loss: 2.8972272872924805
training loss: 2.841616153717041


training:  28%|██████████████████████████████████▏                                                                                     | 258/907 [17:32<34:00,  3.14s/it]

training loss: 2.8746073246002197
training loss: 2.94578218460083
training loss: 2.912290334701538


training:  29%|██████████████████████████████████▌                                                                                     | 261/907 [17:48<41:28,  3.85s/it]

training loss: 2.857600450515747
training loss: 2.835421085357666
training loss: 2.851951837539673


training:  29%|██████████████████████████████████▉                                                                                     | 264/907 [18:02<43:59,  4.10s/it]

training loss: 2.9157464504241943
training loss: 2.8992526531219482
validation loss: 2.922710418701172
training loss: 2.851762294769287
training loss: 2.9155635833740234


training:  30%|███████████████████████████████████▍                                                                                    | 268/907 [18:13<39:00,  3.66s/it]

training loss: 2.8642847537994385
training loss: 2.88395357131958
training loss: 2.8860623836517334
training loss: 2.8412437438964844


training:  30%|███████████████████████████████████▉                                                                                    | 272/907 [18:23<35:04,  3.31s/it]

training loss: 2.8663344383239746
training loss: 2.820343255996704
training loss: 2.9278337955474854
training loss: 2.875814199447632


training:  30%|████████████████████████████████████▌                                                                                   | 276/907 [18:43<40:20,  3.84s/it]

training loss: 2.8821730613708496
training loss: 2.887860059738159
validation loss: 2.826026439666748


training:  31%|████████████████████████████████████▊                                                                                   | 278/907 [18:55<46:31,  4.44s/it]

training loss: 2.897233009338379
training loss: 2.843151330947876


training:  31%|█████████████████████████████████████                                                                                   | 280/907 [19:06<49:52,  4.77s/it]

training loss: 2.8855929374694824
training loss: 2.8955957889556885
training loss: 2.882124900817871


training:  31%|█████████████████████████████████████▍                                                                                  | 283/907 [19:20<49:24,  4.75s/it]

training loss: 2.8576600551605225
training loss: 2.8779327869415283
training loss: 2.874000072479248


training:  32%|█████████████████████████████████████▊                                                                                  | 286/907 [19:34<48:25,  4.68s/it]

training loss: 2.84904146194458
training loss: 2.863799810409546
training loss: 2.884945869445801
training loss: 2.8426592350006104


training:  32%|██████████████████████████████████████▏                                                                                 | 289/907 [19:47<48:05,  4.67s/it]

validation loss: 2.8452775478363037
training loss: 2.8730409145355225
training loss: 2.8709874153137207


training:  32%|██████████████████████████████████████▋                                                                                 | 292/907 [20:01<46:59,  4.58s/it]

training loss: 2.8740386962890625
training loss: 2.824824094772339
training loss: 2.831237316131592


training:  33%|███████████████████████████████████████                                                                                 | 295/907 [20:13<45:10,  4.43s/it]

training loss: 2.933001756668091
training loss: 2.866415500640869
training loss: 2.8615505695343018


training:  33%|███████████████████████████████████████▍                                                                                | 298/907 [20:27<46:10,  4.55s/it]

training loss: 2.9338223934173584
training loss: 2.8861212730407715
training loss: 2.8635661602020264


training:  33%|███████████████████████████████████████▍                                                                                | 298/907 [20:40<46:10,  4.55s/it]

training loss: 2.944488525390625


training:  33%|███████████████████████████████████████▊                                                                                | 301/907 [20:43<48:27,  4.80s/it]

validation loss: 2.93633770942688
training loss: 2.859203338623047
training loss: 2.8704192638397217


training:  34%|████████████████████████████████████████▏                                                                               | 304/907 [20:58<48:19,  4.81s/it]

training loss: 2.8795626163482666
training loss: 2.886361598968506
training loss: 2.8668267726898193


training:  34%|████████████████████████████████████████▌                                                                               | 307/907 [21:13<49:04,  4.91s/it]

training loss: 2.8684287071228027
training loss: 2.90338134765625
training loss: 2.911388397216797


training:  34%|█████████████████████████████████████████                                                                               | 310/907 [21:28<48:51,  4.91s/it]

training loss: 2.899099111557007
training loss: 2.8802242279052734
training loss: 2.873386859893799


training:  34%|█████████████████████████████████████████                                                                               | 310/907 [21:40<48:51,  4.91s/it]

training loss: 2.84721040725708


training:  35%|█████████████████████████████████████████▍                                                                              | 313/907 [21:42<47:23,  4.79s/it]

validation loss: 2.872248411178589
training loss: 2.8586642742156982
training loss: 2.8599839210510254


training:  35%|█████████████████████████████████████████▊                                                                              | 316/907 [21:56<46:42,  4.74s/it]

training loss: 2.866879940032959
training loss: 2.85335373878479
training loss: 2.8811683654785156


training:  35%|██████████████████████████████████████████▏                                                                             | 319/907 [22:09<45:18,  4.62s/it]

training loss: 2.877800464630127
training loss: 2.8606152534484863
training loss: 2.905405282974243


training:  36%|██████████████████████████████████████████▌                                                                             | 322/907 [22:22<44:42,  4.59s/it]

training loss: 2.976928234100342
training loss: 2.9107046127319336
training loss: 2.872101068496704
training loss: 2.847687005996704


training:  36%|██████████████████████████████████████████▉                                                                             | 325/907 [22:36<44:23,  4.58s/it]

validation loss: 2.8994300365448
training loss: 2.878833293914795
training loss: 2.909008502960205


training:  36%|███████████████████████████████████████████▍                                                                            | 328/907 [22:49<43:17,  4.49s/it]

training loss: 2.8734183311462402
training loss: 2.9401133060455322
training loss: 2.8338963985443115


training:  36%|███████████████████████████████████████████▊                                                                            | 331/907 [23:01<42:28,  4.42s/it]

training loss: 2.8437676429748535
training loss: 2.879946708679199
training loss: 2.854668378829956


training:  37%|████████████████████████████████████████████▏                                                                           | 334/907 [23:14<41:41,  4.37s/it]

training loss: 2.940268039703369
training loss: 2.8823745250701904
training loss: 2.85939359664917
training loss: 2.9606566429138184


training:  37%|████████████████████████████████████████████▌                                                                           | 337/907 [23:27<41:21,  4.35s/it]

validation loss: 2.8566136360168457
training loss: 2.8936586380004883
training loss: 2.8716113567352295


training:  37%|████████████████████████████████████████████▉                                                                           | 340/907 [23:39<40:21,  4.27s/it]

training loss: 2.926434278488159
training loss: 2.8848981857299805
training loss: 2.8327949047088623


training:  38%|█████████████████████████████████████████████▍                                                                          | 343/907 [23:53<41:01,  4.36s/it]

training loss: 2.8908891677856445
training loss: 2.8721680641174316
training loss: 2.822765350341797


training:  38%|█████████████████████████████████████████████▊                                                                          | 346/907 [24:06<40:52,  4.37s/it]

training loss: 2.8494222164154053
training loss: 2.9103639125823975
training loss: 2.8876163959503174


training:  38%|█████████████████████████████████████████████▊                                                                          | 346/907 [24:20<40:52,  4.37s/it]

training loss: 2.897667169570923


training:  38%|██████████████████████████████████████████████▏                                                                         | 349/907 [24:20<41:46,  4.49s/it]

validation loss: 2.9129157066345215
training loss: 2.896474838256836
training loss: 2.8771679401397705


training:  39%|██████████████████████████████████████████████▌                                                                         | 352/907 [24:34<41:15,  4.46s/it]

training loss: 2.8905537128448486
training loss: 2.8614654541015625
training loss: 2.8780620098114014


training:  39%|██████████████████████████████████████████████▉                                                                         | 355/907 [24:47<41:27,  4.51s/it]

training loss: 2.8925600051879883
training loss: 2.9161109924316406
training loss: 2.878441572189331


training:  39%|███████████████████████████████████████████████▎                                                                        | 358/907 [25:01<41:09,  4.50s/it]

training loss: 2.8692643642425537
training loss: 2.8927602767944336
training loss: 2.8674066066741943
training loss: 2.901503801345825


training:  40%|███████████████████████████████████████████████▊                                                                        | 361/907 [25:15<41:00,  4.51s/it]

validation loss: 2.966235876083374
training loss: 2.9320900440216064
training loss: 2.8828046321868896


training:  40%|████████████████████████████████████████████████▏                                                                       | 364/907 [25:28<40:28,  4.47s/it]

training loss: 2.9215779304504395
training loss: 2.871053457260132
training loss: 2.9038591384887695


training:  40%|████████████████████████████████████████████████▌                                                                       | 367/907 [25:41<40:02,  4.45s/it]

training loss: 2.892068386077881
training loss: 2.8865139484405518
training loss: 2.8922624588012695


training:  41%|████████████████████████████████████████████████▉                                                                       | 370/907 [25:53<38:27,  4.30s/it]

training loss: 2.8487231731414795
training loss: 2.8301475048065186
training loss: 2.8729562759399414
training loss: 2.882448673248291


training:  41%|█████████████████████████████████████████████████▎                                                                      | 373/907 [26:07<39:36,  4.45s/it]

validation loss: 2.8793299198150635
training loss: 2.831987142562866
training loss: 2.8597006797790527


training:  41%|█████████████████████████████████████████████████▋                                                                      | 376/907 [26:21<39:35,  4.47s/it]

training loss: 2.9222021102905273
training loss: 2.854374408721924
training loss: 2.836827278137207


training:  42%|██████████████████████████████████████████████████▏                                                                     | 379/907 [26:33<38:48,  4.41s/it]

training loss: 2.8515968322753906
training loss: 2.8969876766204834
training loss: 2.872987985610962


training:  42%|██████████████████████████████████████████████████▌                                                                     | 382/907 [26:47<39:08,  4.47s/it]

training loss: 2.850409984588623
training loss: 2.89452862739563
training loss: 2.8925528526306152


training:  42%|██████████████████████████████████████████████████▌                                                                     | 382/907 [27:00<39:08,  4.47s/it]

training loss: 2.9280333518981934


training:  42%|██████████████████████████████████████████████████▉                                                                     | 385/907 [27:04<41:18,  4.75s/it]

validation loss: 2.9097633361816406
training loss: 2.896366596221924


training:  43%|███████████████████████████████████████████████████▏                                                                    | 387/907 [27:15<43:36,  5.03s/it]

training loss: 2.8490917682647705
training loss: 2.877406120300293


training:  43%|███████████████████████████████████████████████████▍                                                                    | 389/907 [27:27<46:34,  5.40s/it]

training loss: 2.899343967437744
training loss: 2.8845067024230957


training:  43%|███████████████████████████████████████████████████▋                                                                    | 391/907 [27:39<47:13,  5.49s/it]

training loss: 2.930011034011841
training loss: 2.911536693572998
training loss: 2.860905885696411
training loss: 2.905923366546631


training:  44%|████████████████████████████████████████████████████▎                                                                   | 395/907 [27:49<39:22,  4.61s/it]

training loss: 2.885195016860962
training loss: 2.8713035583496094
training loss: 2.9457149505615234
validation loss: 2.845780611038208
training loss: 2.899362087249756
training loss: 2.8385260105133057
training loss: 2.9060471057891846
training loss: 2.8725333213806152
training loss: 2.8274736404418945


training:  44%|█████████████████████████████████████████████████████▎                                                                  | 403/907 [28:01<30:56,  3.68s/it]

training loss: 2.884249448776245
training loss: 2.826738119125366
training loss: 2.8496153354644775
training loss: 2.8880574703216553
training loss: 2.9266538619995117
training loss: 2.8903541564941406


training:  45%|██████████████████████████████████████████████████████                                                                  | 409/907 [28:20<29:10,  3.51s/it]

training loss: 2.841649293899536
validation loss: 2.8660473823547363
training loss: 2.8632254600524902
training loss: 2.8450825214385986
training loss: 2.8631420135498047


training:  46%|██████████████████████████████████████████████████████▋                                                                 | 413/907 [28:37<31:04,  3.77s/it]

training loss: 2.890597343444824
training loss: 2.862889289855957
training loss: 2.8408608436584473


training:  46%|███████████████████████████████████████████████████████                                                                 | 416/907 [28:53<34:12,  4.18s/it]

training loss: 2.891606330871582
training loss: 2.843782901763916
training loss: 2.9104502201080322


training:  46%|███████████████████████████████████████████████████████▍                                                                | 419/907 [29:06<34:40,  4.26s/it]

training loss: 2.8948049545288086
training loss: 2.8856041431427
training loss: 2.8736050128936768
validation loss: 2.864835739135742


training:  47%|███████████████████████████████████████████████████████▊                                                                | 422/907 [29:19<34:35,  4.28s/it]

training loss: 2.8042707443237305
training loss: 2.852210283279419
training loss: 2.9193880558013916


training:  47%|████████████████████████████████████████████████████████▏                                                               | 425/907 [29:34<35:40,  4.44s/it]

training loss: 2.8654916286468506
training loss: 2.8497073650360107
training loss: 2.8808064460754395


training:  47%|████████████████████████████████████████████████████████▋                                                               | 428/907 [29:49<37:12,  4.66s/it]

training loss: 2.8660335540771484
training loss: 2.833432912826538
training loss: 2.8698508739471436


training:  48%|█████████████████████████████████████████████████████████                                                               | 431/907 [30:04<37:42,  4.75s/it]

training loss: 2.8903648853302
training loss: 2.85966420173645
training loss: 2.963106632232666
validation loss: 2.8621957302093506


training:  48%|█████████████████████████████████████████████████████████▍                                                              | 434/907 [30:21<39:25,  5.00s/it]

training loss: 2.9152872562408447
training loss: 2.902249813079834


training:  48%|█████████████████████████████████████████████████████████▋                                                              | 436/907 [30:32<40:16,  5.13s/it]

training loss: 2.8428335189819336
training loss: 2.868950128555298
training loss: 2.883131265640259


training:  48%|██████████████████████████████████████████████████████████                                                              | 439/907 [30:46<39:11,  5.02s/it]

training loss: 2.877521276473999
training loss: 2.866666078567505
training loss: 2.9209933280944824


training:  49%|██████████████████████████████████████████████████████████▍                                                             | 442/907 [31:01<38:59,  5.03s/it]

training loss: 2.8676748275756836
training loss: 2.8585469722747803
training loss: 2.9241080284118652
training loss: 2.898226022720337


training:  49%|██████████████████████████████████████████████████████████▉                                                             | 445/907 [31:18<40:13,  5.22s/it]

validation loss: 2.864867687225342
training loss: 2.846278429031372


training:  49%|███████████████████████████████████████████████████████████▏                                                            | 447/907 [31:30<41:39,  5.43s/it]

training loss: 2.8849103450775146
training loss: 2.9269657135009766


training:  50%|███████████████████████████████████████████████████████████▍                                                            | 449/907 [31:41<41:21,  5.42s/it]

training loss: 2.8766961097717285
training loss: 2.9085633754730225


training:  50%|███████████████████████████████████████████████████████████▋                                                            | 451/907 [31:51<40:40,  5.35s/it]

training loss: 2.9051246643066406
training loss: 2.8496623039245605


training:  50%|███████████████████████████████████████████████████████████▉                                                            | 453/907 [32:01<39:45,  5.26s/it]

training loss: 2.8849148750305176
training loss: 2.8604376316070557
training loss: 2.863335371017456
training loss: 2.870535373687744
training loss: 2.9339852333068848
validation loss: 2.8970987796783447


training:  50%|████████████████████████████████████████████████████████████▌                                                           | 458/907 [32:11<32:04,  4.29s/it]

training loss: 2.907189130783081
training loss: 2.901350498199463
training loss: 2.8583459854125977
training loss: 2.8575551509857178
training loss: 2.867298126220703


training:  51%|█████████████████████████████████████████████████████████████▎                                                          | 463/907 [32:22<26:52,  3.63s/it]

training loss: 2.8653602600097656
training loss: 2.8890411853790283
training loss: 2.8817250728607178
training loss: 2.8832929134368896
training loss: 2.9084181785583496


training:  52%|█████████████████████████████████████████████████████████████▉                                                          | 468/907 [32:37<25:23,  3.47s/it]

training loss: 2.8464276790618896
training loss: 2.820460557937622
validation loss: 2.8704869747161865
training loss: 2.8242387771606445


training:  52%|██████████████████████████████████████████████████████████████▎                                                         | 471/907 [32:52<28:08,  3.87s/it]

training loss: 2.891794443130493
training loss: 2.8579165935516357
training loss: 2.91090726852417


training:  52%|██████████████████████████████████████████████████████████████▋                                                         | 474/907 [33:06<29:38,  4.11s/it]

training loss: 2.850898265838623
training loss: 2.967855930328369
training loss: 2.883723258972168


training:  53%|███████████████████████████████████████████████████████████████                                                         | 477/907 [33:20<30:37,  4.27s/it]

training loss: 2.8276114463806152
training loss: 2.8479487895965576
training loss: 2.8653786182403564


training:  53%|███████████████████████████████████████████████████████████████▌                                                        | 480/907 [33:34<31:14,  4.39s/it]

training loss: 2.8496649265289307
training loss: 2.884154796600342
validation loss: 2.856304407119751
training loss: 2.899012565612793


training:  53%|███████████████████████████████████████████████████████████████▉                                                        | 483/907 [33:48<31:54,  4.52s/it]

training loss: 2.8626809120178223
training loss: 2.9434256553649902
training loss: 2.8825550079345703


training:  54%|████████████████████████████████████████████████████████████████▎                                                       | 486/907 [34:02<31:59,  4.56s/it]

training loss: 2.8678138256073
training loss: 2.8751959800720215
training loss: 2.839568853378296


training:  54%|████████████████████████████████████████████████████████████████▋                                                       | 489/907 [34:16<32:00,  4.59s/it]

training loss: 2.851198673248291
training loss: 2.8950212001800537
training loss: 2.905219793319702


training:  54%|█████████████████████████████████████████████████████████████████                                                       | 492/907 [34:30<31:56,  4.62s/it]

training loss: 2.888690948486328
training loss: 2.8553452491760254
validation loss: 2.9158523082733154
training loss: 2.89459490776062


training:  55%|█████████████████████████████████████████████████████████████████▍                                                      | 495/907 [34:44<31:45,  4.63s/it]

training loss: 2.898175001144409
training loss: 2.8653342723846436
training loss: 2.891932487487793


training:  55%|█████████████████████████████████████████████████████████████████▉                                                      | 498/907 [34:58<31:36,  4.64s/it]

training loss: 2.8768181800842285
training loss: 2.914806842803955
training loss: 2.8716659545898438


training:  55%|██████████████████████████████████████████████████████████████████▎                                                     | 501/907 [35:12<31:26,  4.65s/it]

training loss: 2.884138822555542
training loss: 2.909100294113159
training loss: 2.879056930541992


training:  56%|██████████████████████████████████████████████████████████████████▋                                                     | 504/907 [35:26<31:15,  4.65s/it]

training loss: 2.889477014541626
training loss: 2.9087460041046143
validation loss: 2.904697895050049
training loss: 2.8504059314727783


training:  56%|███████████████████████████████████████████████████████████████████                                                     | 507/907 [35:40<31:17,  4.69s/it]

training loss: 2.8284919261932373
training loss: 2.8782923221588135
training loss: 2.878492832183838


training:  56%|███████████████████████████████████████████████████████████████████▍                                                    | 510/907 [35:54<31:00,  4.69s/it]

training loss: 2.865723133087158
training loss: 2.878939151763916
training loss: 2.8599419593811035


training:  57%|███████████████████████████████████████████████████████████████████▊                                                    | 513/907 [36:08<30:44,  4.68s/it]

training loss: 2.883469343185425
training loss: 2.920163631439209
training loss: 2.8921725749969482


training:  57%|████████████████████████████████████████████████████████████████████▎                                                   | 516/907 [36:23<30:33,  4.69s/it]

training loss: 2.989017963409424
training loss: 2.8465263843536377
validation loss: 2.9091131687164307
training loss: 2.9288289546966553


training:  57%|████████████████████████████████████████████████████████████████████▋                                                   | 519/907 [36:37<30:32,  4.72s/it]

training loss: 2.924039840698242
training loss: 2.883997678756714
training loss: 2.8816416263580322


training:  58%|█████████████████████████████████████████████████████████████████████                                                   | 522/907 [36:51<30:17,  4.72s/it]

training loss: 2.9330644607543945
training loss: 2.897125244140625
training loss: 2.88429856300354


training:  58%|█████████████████████████████████████████████████████████████████████▍                                                  | 525/907 [37:05<29:57,  4.71s/it]

training loss: 2.950247287750244
training loss: 2.8680968284606934
training loss: 2.886593818664551


training:  58%|█████████████████████████████████████████████████████████████████████▊                                                  | 528/907 [37:20<30:11,  4.78s/it]

training loss: 2.8988263607025146
training loss: 2.924835443496704
validation loss: 2.875096082687378
training loss: 2.933306932449341


training:  59%|██████████████████████████████████████████████████████████████████████▎                                                 | 531/907 [37:35<30:18,  4.84s/it]

training loss: 2.8977155685424805
training loss: 2.886639356613159
training loss: 2.861635208129883


training:  59%|██████████████████████████████████████████████████████████████████████▋                                                 | 534/907 [37:50<30:36,  4.92s/it]

training loss: 2.858506202697754
training loss: 2.890357494354248


training:  59%|██████████████████████████████████████████████████████████████████████▉                                                 | 536/907 [38:01<31:08,  5.04s/it]

training loss: 2.8865509033203125
training loss: 2.8440895080566406


training:  59%|███████████████████████████████████████████████████████████████████████▏                                                | 538/907 [38:11<31:11,  5.07s/it]

training loss: 2.8545637130737305
training loss: 2.8733034133911133


training:  60%|███████████████████████████████████████████████████████████████████████▍                                                | 540/907 [38:22<31:27,  5.14s/it]

training loss: 2.8838932514190674
training loss: 2.912247896194458
validation loss: 2.882730007171631


training:  60%|███████████████████████████████████████████████████████████████████████▋                                                | 542/907 [38:33<31:44,  5.22s/it]

training loss: 2.8803765773773193
training loss: 2.8955862522125244


training:  60%|███████████████████████████████████████████████████████████████████████▉                                                | 544/907 [38:43<31:44,  5.25s/it]

training loss: 2.8614540100097656
training loss: 2.8808650970458984


training:  60%|████████████████████████████████████████████████████████████████████████▏                                               | 546/907 [38:54<31:23,  5.22s/it]

training loss: 2.9128012657165527
training loss: 2.8585801124572754


training:  60%|████████████████████████████████████████████████████████████████████████▌                                               | 548/907 [39:04<31:18,  5.23s/it]

training loss: 2.8404412269592285
training loss: 2.9028940200805664
training loss: 2.8655428886413574


training:  61%|████████████████████████████████████████████████████████████████████████▉                                               | 551/907 [39:20<31:03,  5.23s/it]

training loss: 2.881802797317505
training loss: 2.878324031829834
training loss: 2.857567548751831


training:  61%|█████████████████████████████████████████████████████████████████████████▏                                              | 553/907 [39:32<32:52,  5.57s/it]

validation loss: 2.8782246112823486
training loss: 2.916198968887329
training loss: 2.8809468746185303


training:  61%|█████████████████████████████████████████████████████████████████████████▌                                              | 556/907 [39:48<31:39,  5.41s/it]

training loss: 2.8910417556762695
training loss: 2.854788064956665


training:  62%|█████████████████████████████████████████████████████████████████████████▊                                              | 558/907 [39:59<31:33,  5.43s/it]

training loss: 2.8761587142944336
training loss: 2.8997323513031006


training:  62%|██████████████████████████████████████████████████████████████████████████                                              | 560/907 [40:09<30:50,  5.33s/it]

training loss: 2.8692033290863037
training loss: 2.875136137008667


training:  62%|██████████████████████████████████████████████████████████████████████████▎                                             | 562/907 [40:19<30:26,  5.29s/it]

training loss: 2.909686326980591
training loss: 2.8621280193328857
training loss: 2.8521103858947754


training:  62%|██████████████████████████████████████████████████████████████████████████▎                                             | 562/907 [40:30<30:26,  5.29s/it]

training loss: 2.8563740253448486


training:  62%|██████████████████████████████████████████████████████████████████████████▊                                             | 565/907 [40:35<29:57,  5.26s/it]

validation loss: 2.8450088500976562
training loss: 2.9475560188293457
training loss: 2.8653714656829834


training:  63%|███████████████████████████████████████████████████████████████████████████▏                                            | 568/907 [40:48<28:30,  5.05s/it]

training loss: 2.9002022743225098
training loss: 2.858283042907715
training loss: 2.8952412605285645


training:  63%|███████████████████████████████████████████████████████████████████████████▌                                            | 571/907 [41:03<28:08,  5.02s/it]

training loss: 2.873485565185547
training loss: 2.8781049251556396
training loss: 2.857865810394287


training:  63%|███████████████████████████████████████████████████████████████████████████▉                                            | 574/907 [41:18<27:49,  5.01s/it]

training loss: 2.8748831748962402
training loss: 2.8629162311553955
training loss: 2.933556318283081


training:  63%|███████████████████████████████████████████████████████████████████████████▉                                            | 574/907 [41:30<27:49,  5.01s/it]

training loss: 2.8829002380371094


training:  64%|████████████████████████████████████████████████████████████████████████████▎                                           | 577/907 [41:34<28:13,  5.13s/it]

validation loss: 2.8824150562286377
training loss: 2.863551616668701


training:  64%|████████████████████████████████████████████████████████████████████████████▌                                           | 579/907 [41:45<28:08,  5.15s/it]

training loss: 2.9018688201904297
training loss: 2.866642475128174


training:  64%|████████████████████████████████████████████████████████████████████████████▊                                           | 581/907 [41:55<28:08,  5.18s/it]

training loss: 2.8688011169433594
training loss: 2.8991475105285645


training:  64%|█████████████████████████████████████████████████████████████████████████████▏                                          | 583/907 [42:06<28:31,  5.28s/it]

training loss: 2.899717092514038
training loss: 2.8561792373657227


training:  64%|█████████████████████████████████████████████████████████████████████████████▍                                          | 585/907 [42:17<28:44,  5.35s/it]

training loss: 2.8756589889526367
training loss: 2.8785321712493896


training:  65%|█████████████████████████████████████████████████████████████████████████████▋                                          | 587/907 [42:28<28:42,  5.38s/it]

training loss: 2.8491289615631104
training loss: 2.8472299575805664
training loss: 2.8826990127563477


training:  65%|█████████████████████████████████████████████████████████████████████████████▉                                          | 589/907 [42:39<28:22,  5.35s/it]

validation loss: 2.9059717655181885
training loss: 2.8722214698791504
training loss: 2.945492744445801


training:  65%|██████████████████████████████████████████████████████████████████████████████▎                                         | 592/907 [42:54<27:25,  5.22s/it]

training loss: 2.836367607116699
training loss: 2.898702383041382
training loss: 2.832306146621704


training:  66%|██████████████████████████████████████████████████████████████████████████████▋                                         | 595/907 [43:09<27:07,  5.22s/it]

training loss: 2.8617193698883057
training loss: 2.881175994873047
training loss: 2.8764777183532715


training:  66%|███████████████████████████████████████████████████████████████████████████████                                         | 598/907 [43:24<26:13,  5.09s/it]

training loss: 2.8732826709747314
training loss: 2.8884942531585693
training loss: 2.8489930629730225
training loss: 2.895534038543701


training:  66%|███████████████████████████████████████████████████████████████████████████████▌                                        | 601/907 [43:38<25:45,  5.05s/it]

validation loss: 2.852943181991577
training loss: 2.9105753898620605
training loss: 2.9030685424804688


training:  67%|███████████████████████████████████████████████████████████████████████████████▉                                        | 604/907 [43:54<25:26,  5.04s/it]

training loss: 2.8861753940582275
training loss: 2.861910104751587
training loss: 2.893101215362549
training loss: 2.9358294010162354
training loss: 2.8556320667266846


training:  67%|████████████████████████████████████████████████████████████████████████████████▌                                       | 609/907 [44:05<20:47,  4.19s/it]

training loss: 2.825291872024536
training loss: 2.8653080463409424
training loss: 2.89805006980896
training loss: 2.9016146659851074
training loss: 2.904606819152832
validation loss: 2.8686347007751465
training loss: 2.9243321418762207
training loss: 2.887437582015991
training loss: 2.8845667839050293


training:  68%|█████████████████████████████████████████████████████████████████████████████████▋                                      | 617/907 [44:15<16:01,  3.31s/it]

training loss: 2.8767611980438232
training loss: 2.8254165649414062
training loss: 2.8327994346618652
training loss: 2.8876399993896484
training loss: 2.903024196624756
training loss: 2.8873090744018555
training loss: 2.8459231853485107
training loss: 2.885063409805298
training loss: 2.903170585632324


training:  69%|██████████████████████████████████████████████████████████████████████████████████▋                                     | 625/907 [44:29<13:27,  2.86s/it]

validation loss: 2.8696067333221436
training loss: 2.8658456802368164
training loss: 2.8569600582122803


training:  69%|███████████████████████████████████████████████████████████████████████████████████                                     | 628/907 [44:43<15:33,  3.35s/it]

training loss: 2.9149413108825684
training loss: 2.861591339111328
training loss: 2.8543665409088135
training loss: 2.931504249572754
training loss: 2.878387928009033


training:  70%|███████████████████████████████████████████████████████████████████████████████████▋                                    | 633/907 [44:55<14:10,  3.10s/it]

training loss: 2.874483823776245
training loss: 2.8754048347473145
training loss: 2.8432528972625732
training loss: 2.8760969638824463


training:  70%|███████████████████████████████████████████████████████████████████████████████████▋                                    | 633/907 [45:10<14:10,  3.10s/it]

training loss: 2.8897666931152344


training:  70%|████████████████████████████████████████████████████████████████████████████████████▎                                   | 637/907 [45:14<16:08,  3.59s/it]

validation loss: 2.922076463699341
training loss: 2.8547306060791016
training loss: 2.9104342460632324
training loss: 2.9101719856262207


training:  71%|████████████████████████████████████████████████████████████████████████████████████▊                                   | 641/907 [45:30<16:13,  3.66s/it]

training loss: 2.8611998558044434
training loss: 2.8528995513916016
training loss: 2.889446496963501


training:  71%|█████████████████████████████████████████████████████████████████████████████████████▏                                  | 644/907 [45:42<16:33,  3.78s/it]

training loss: 2.9103586673736572
training loss: 2.831835985183716
training loss: 2.916024684906006


training:  71%|█████████████████████████████████████████████████████████████████████████████████████▌                                  | 647/907 [45:59<18:45,  4.33s/it]

training loss: 2.894469976425171
training loss: 2.8615686893463135
training loss: 2.862684726715088
validation loss: 2.844558000564575
training loss: 2.8852951526641846
training loss: 2.8853678703308105


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▎                                 | 652/907 [46:09<15:32,  3.66s/it]

training loss: 2.8729636669158936
training loss: 2.872831106185913
training loss: 2.872849464416504
training loss: 2.8453409671783447
training loss: 2.8304953575134277
training loss: 2.9215247631073
training loss: 2.842381000518799
training loss: 2.8902227878570557


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▎                                | 660/907 [46:20<12:10,  2.96s/it]

training loss: 2.8951456546783447
training loss: 2.8901607990264893
validation loss: 2.903099536895752
training loss: 2.8919949531555176
training loss: 2.885443925857544
training loss: 2.856348752975464


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▉                                | 665/907 [46:31<11:00,  2.73s/it]

training loss: 2.888335943222046
training loss: 2.8476462364196777
training loss: 2.8612277507781982
training loss: 2.8654136657714844
training loss: 2.8387372493743896


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▋                               | 670/907 [46:45<10:54,  2.76s/it]

training loss: 2.9239578247070312
training loss: 2.854797601699829
training loss: 2.881565570831299
training loss: 2.917053461074829
validation loss: 2.8849942684173584


training:  74%|█████████████████████████████████████████████████████████████████████████████████████████▏                              | 674/907 [47:03<12:40,  3.27s/it]

training loss: 2.848775863647461
training loss: 2.9022581577301025
training loss: 2.867035150527954


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▌                              | 677/907 [47:15<13:43,  3.58s/it]

training loss: 2.930131673812866
training loss: 2.87156343460083
training loss: 2.8632116317749023


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▉                              | 680/907 [47:29<14:33,  3.85s/it]

training loss: 2.8835132122039795
training loss: 2.8506295680999756
training loss: 2.917248249053955


training:  75%|██████████████████████████████████████████████████████████████████████████████████████████▎                             | 683/907 [47:42<14:58,  4.01s/it]

training loss: 2.913687229156494
training loss: 2.8948140144348145
training loss: 2.841388463973999
validation loss: 2.891148567199707


training:  76%|██████████████████████████████████████████████████████████████████████████████████████████▊                             | 686/907 [47:53<14:24,  3.91s/it]

training loss: 2.8636860847473145
training loss: 2.8132941722869873
training loss: 2.8961479663848877


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▏                            | 689/907 [48:07<14:51,  4.09s/it]

training loss: 2.902451515197754
training loss: 2.863882064819336
training loss: 2.8860890865325928


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▌                            | 692/907 [48:19<14:36,  4.08s/it]

training loss: 2.862502336502075
training loss: 2.8772263526916504
training loss: 2.8943915367126465


training:  77%|███████████████████████████████████████████████████████████████████████████████████████████▉                            | 695/907 [48:31<14:29,  4.10s/it]

training loss: 2.886082649230957
training loss: 2.9256274700164795
training loss: 2.885119676589966
validation loss: 2.915086269378662


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▎                           | 698/907 [48:44<14:19,  4.11s/it]

training loss: 2.8638243675231934
training loss: 2.899761199951172
training loss: 2.8528823852539062


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▋                           | 701/907 [48:56<14:06,  4.11s/it]

training loss: 2.8990488052368164
training loss: 2.8663432598114014
training loss: 2.8647804260253906
training loss: 2.887383460998535


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▎                          | 705/907 [49:09<12:54,  3.83s/it]

training loss: 2.8956410884857178
training loss: 2.8904223442077637
training loss: 2.9059715270996094


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▋                          | 708/907 [49:21<12:59,  3.91s/it]

training loss: 2.8630385398864746
training loss: 2.8785247802734375
validation loss: 2.873507261276245
training loss: 2.8792505264282227


training:  78%|██████████████████████████████████████████████████████████████████████████████████████████████                          | 711/907 [49:34<13:03,  4.00s/it]

training loss: 2.8727288246154785
training loss: 2.8988184928894043
training loss: 2.8536720275878906


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▍                         | 714/907 [49:45<12:39,  3.94s/it]

training loss: 2.8868534564971924
training loss: 2.899534225463867
training loss: 2.8933863639831543


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▊                         | 717/907 [49:58<12:44,  4.03s/it]

training loss: 2.889679431915283
training loss: 2.9238624572753906
training loss: 2.874269723892212


training:  79%|███████████████████████████████████████████████████████████████████████████████████████████████▎                        | 720/907 [50:08<12:08,  3.89s/it]

training loss: 2.9182252883911133
training loss: 2.8834619522094727
validation loss: 2.8839292526245117
training loss: 2.8571386337280273


training:  80%|███████████████████████████████████████████████████████████████████████████████████████████████▋                        | 723/907 [50:22<12:37,  4.12s/it]

training loss: 2.874856472015381
training loss: 2.8871636390686035
training loss: 2.8759806156158447


training:  80%|████████████████████████████████████████████████████████████████████████████████████████████████                        | 726/907 [50:35<12:38,  4.19s/it]

training loss: 2.8794589042663574
training loss: 2.864220142364502
training loss: 2.879124164581299


training:  80%|████████████████████████████████████████████████████████████████████████████████████████████████▍                       | 729/907 [50:48<12:19,  4.15s/it]

training loss: 2.8573856353759766
training loss: 2.9178225994110107
training loss: 2.8807809352874756


training:  81%|████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 732/907 [50:58<11:29,  3.94s/it]

training loss: 2.888807773590088
training loss: 2.8736472129821777
validation loss: 2.8231000900268555
training loss: 2.873626708984375


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████▏                      | 735/907 [51:10<11:27,  4.00s/it]

training loss: 2.8648784160614014
training loss: 2.8899364471435547
training loss: 2.7927587032318115


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████▋                      | 738/907 [51:23<11:20,  4.02s/it]

training loss: 2.8381595611572266
training loss: 2.9091882705688477
training loss: 2.888493061065674


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████                      | 741/907 [51:35<11:11,  4.04s/it]

training loss: 2.818963050842285
training loss: 2.8699538707733154
training loss: 2.8465447425842285


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 744/907 [51:48<11:13,  4.13s/it]

training loss: 2.8399853706359863
training loss: 2.8695602416992188
validation loss: 2.897831916809082
training loss: 2.8745057582855225


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▊                     | 747/907 [51:58<10:24,  3.90s/it]

training loss: 2.846684455871582
training loss: 2.8945565223693848
training loss: 2.8524014949798584


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                    | 750/907 [52:10<10:16,  3.93s/it]

training loss: 2.91778826713562
training loss: 2.901686191558838
training loss: 2.898625373840332


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 753/907 [52:22<10:03,  3.92s/it]

training loss: 2.877157688140869
training loss: 2.8535690307617188
training loss: 2.866659641265869


training:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████                    | 756/907 [52:35<10:09,  4.04s/it]

training loss: 2.866375684738159
training loss: 2.8791611194610596
validation loss: 2.8740150928497314
training loss: 2.8970067501068115


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 759/907 [52:47<10:07,  4.10s/it]

training loss: 2.9056646823883057
training loss: 2.890883684158325
training loss: 2.879903793334961


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 762/907 [53:01<10:10,  4.21s/it]

training loss: 2.859276294708252
training loss: 2.8662967681884766
training loss: 2.882493734359741


training:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 765/907 [53:12<09:37,  4.06s/it]

training loss: 2.8694772720336914
training loss: 2.8468053340911865
training loss: 2.886955738067627


training:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 768/907 [53:23<09:11,  3.97s/it]

training loss: 2.8612687587738037
training loss: 2.86041522026062
validation loss: 2.9617958068847656
training loss: 2.852396249771118


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████                  | 771/907 [53:35<08:57,  3.96s/it]

training loss: 2.866210460662842
training loss: 2.8842077255249023
training loss: 2.885239839553833


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 774/907 [53:48<09:00,  4.07s/it]

training loss: 2.9093475341796875
training loss: 2.836643695831299
training loss: 2.9157967567443848


training:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                 | 777/907 [54:00<08:47,  4.06s/it]

training loss: 2.8857502937316895
training loss: 2.867783308029175
training loss: 2.9482011795043945


training:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 780/907 [54:13<08:47,  4.16s/it]

training loss: 2.8410451412200928
training loss: 2.8767521381378174
validation loss: 2.816973924636841
training loss: 2.908740520477295
training loss: 2.8649423122406006
training loss: 2.9050188064575195
training loss: 2.8679940700531006
training loss: 2.867152452468872


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████                | 787/907 [54:23<06:40,  3.34s/it]

training loss: 2.8576724529266357
training loss: 2.8904151916503906
training loss: 2.9151086807250977
training loss: 2.8560478687286377


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋               | 791/907 [54:40<06:56,  3.59s/it]

training loss: 2.9088873863220215
training loss: 2.932002544403076
training loss: 2.910994291305542
validation loss: 2.880168914794922
training loss: 2.850512981414795
training loss: 2.900487184524536
training loss: 2.8569018840789795
training loss: 2.966780424118042


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 798/907 [54:52<05:28,  3.02s/it]

training loss: 2.869455337524414
training loss: 2.8726751804351807
training loss: 2.8558573722839355
training loss: 2.908280849456787
training loss: 2.8510451316833496
training loss: 2.867459774017334


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 804/907 [55:13<05:26,  3.17s/it]

training loss: 2.9217379093170166
training loss: 2.910048723220825
validation loss: 2.8550798892974854
training loss: 2.9182727336883545


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 807/907 [55:28<06:13,  3.74s/it]

training loss: 3.131049633026123
training loss: 2.8927831649780273
training loss: 2.8902721405029297


training:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 810/907 [55:39<06:03,  3.75s/it]

training loss: 2.8821539878845215
training loss: 2.938140869140625
training loss: 2.8751323223114014


training:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 813/907 [55:51<06:00,  3.83s/it]

training loss: 2.8868980407714844
training loss: 2.832517147064209
training loss: 2.916015863418579


training:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▉            | 816/907 [56:09<06:47,  4.47s/it]

training loss: 2.8654305934906006
training loss: 2.907696008682251
validation loss: 2.9240939617156982


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 818/907 [56:22<07:33,  5.10s/it]

training loss: 2.8395278453826904
training loss: 2.869805335998535


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▍           | 820/907 [56:35<07:54,  5.45s/it]

training loss: 2.895773410797119
training loss: 2.8837380409240723
training loss: 2.8876049518585205


training:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 823/907 [56:48<07:06,  5.08s/it]

training loss: 2.897921323776245
training loss: 2.8878531455993652
training loss: 2.893878936767578


training:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 826/907 [57:03<06:48,  5.05s/it]

training loss: 2.894680976867676
training loss: 2.8995370864868164
training loss: 2.8568601608276367
training loss: 2.915187358856201
validation loss: 2.856459379196167


training:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 830/907 [57:16<05:48,  4.53s/it]

training loss: 2.8834948539733887
training loss: 2.8573806285858154
training loss: 2.9284234046936035
training loss: 2.8968465328216553


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 834/907 [57:29<05:03,  4.15s/it]

training loss: 2.9022269248962402
training loss: 2.872321367263794
training loss: 2.89190673828125
training loss: 2.8481597900390625
training loss: 2.895090103149414
training loss: 2.8902058601379395


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 840/907 [57:43<04:01,  3.61s/it]

training loss: 2.874490737915039
training loss: 2.885373592376709
validation loss: 2.892659902572632
training loss: 2.871288299560547
training loss: 2.9183082580566406


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 844/907 [58:04<04:19,  4.12s/it]

training loss: 2.9458086490631104
training loss: 2.9070372581481934


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▉        | 846/907 [58:15<04:33,  4.49s/it]

training loss: 2.840717315673828
training loss: 2.8661060333251953
training loss: 2.9366252422332764


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 849/907 [58:30<04:27,  4.61s/it]

training loss: 2.9164626598358154
training loss: 2.852806329727173


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌       | 851/907 [58:42<04:44,  5.09s/it]

training loss: 2.8484036922454834
training loss: 2.859281063079834
training loss: 2.9030377864837646


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 853/907 [58:55<04:59,  5.55s/it]

validation loss: 2.848007917404175
training loss: 2.918527603149414


training:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 855/907 [59:07<04:55,  5.68s/it]

training loss: 2.846208095550537
training loss: 2.9133262634277344


training:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 857/907 [59:18<04:41,  5.63s/it]

training loss: 2.8646461963653564
training loss: 2.879892349243164


training:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 859/907 [59:28<04:22,  5.47s/it]

training loss: 2.883075714111328
training loss: 2.8422701358795166


training:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉      | 861/907 [59:39<04:10,  5.45s/it]

training loss: 2.864109992980957
training loss: 2.832338809967041


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 863/907 [59:49<03:54,  5.33s/it]

training loss: 2.9221267700195312
training loss: 2.8727145195007324


training:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 863/907 [1:00:00<03:54,  5.33s/it]

training loss: 2.8816044330596924


training:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 865/907 [1:00:00<03:46,  5.40s/it]

validation loss: 2.866452217102051
training loss: 2.8267321586608887


training:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 867/907 [1:00:11<03:32,  5.32s/it]

training loss: 2.8982043266296387
training loss: 2.8404319286346436
training loss: 2.889348268508911


training:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 870/907 [1:00:23<03:02,  4.95s/it]

training loss: 2.892014265060425
training loss: 2.88181209564209
training loss: 2.854238986968994


training:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 873/907 [1:00:39<02:53,  5.09s/it]

training loss: 2.894747495651245
training loss: 2.8621835708618164
training loss: 2.839353084564209


training:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉    | 876/907 [1:00:55<02:37,  5.09s/it]

training loss: 2.862837553024292
training loss: 2.8791894912719727
validation loss: 2.8923089504241943


training:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 878/907 [1:01:05<02:29,  5.14s/it]

training loss: 2.844666004180908
training loss: 2.876126289367676
training loss: 2.8688197135925293


training:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 881/907 [1:01:19<02:09,  5.00s/it]

training loss: 2.8774466514587402
training loss: 2.8217644691467285
training loss: 2.8220744132995605


training:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 884/907 [1:01:33<01:52,  4.89s/it]

training loss: 2.9256515502929688
training loss: 2.8559155464172363
training loss: 2.8618245124816895


training:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 887/907 [1:01:47<01:36,  4.80s/it]

training loss: 2.9235470294952393
training loss: 2.890350341796875
training loss: 2.8616902828216553
validation loss: 2.8252310752868652


training:  98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 890/907 [1:02:01<01:21,  4.81s/it]

training loss: 2.9356956481933594
training loss: 2.8553481101989746
training loss: 2.871290683746338


training:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 893/907 [1:02:16<01:08,  4.86s/it]

training loss: 2.8847479820251465
training loss: 2.891231060028076
training loss: 2.8638556003570557


training:  99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 896/907 [1:02:30<00:53,  4.84s/it]

training loss: 2.8583531379699707
training loss: 2.907407760620117
training loss: 2.904188394546509


training:  99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 899/907 [1:02:41<00:35,  4.46s/it]

training loss: 2.892298936843872
training loss: 2.880833864212036
training loss: 2.8753037452697754
validation loss: 2.865846872329712


training:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 902/907 [1:02:57<00:23,  4.67s/it]

training loss: 2.8668196201324463
training loss: 2.8645644187927246
training loss: 2.8576502799987793


training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 905/907 [1:03:11<00:09,  4.68s/it]

training loss: 2.8603320121765137
training loss: 2.8578603267669678


training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 907/907 [1:03:19<00:00,  4.19s/it]
training:   0%|                                                                                                                                  | 0/907 [00:00<?, ?it/s]

training loss: 2.8815369606018066
training loss: 2.868820905685425
validation loss: 2.910703182220459
training loss: 2.903090715408325
training loss: 2.8824188709259033
training loss: 2.9194231033325195
training loss: 2.9053828716278076
training loss: 2.8948047161102295


training:   1%|▉                                                                                                                         | 7/907 [00:10<23:08,  1.54s/it]

training loss: 2.891319990158081
training loss: 2.879565954208374
training loss: 2.912682294845581
training loss: 2.8750178813934326
training loss: 2.932180643081665
training loss: 2.8386752605438232
training loss: 2.835401773452759
validation loss: 2.885460615158081


training:   2%|█▊                                                                                                                       | 14/907 [00:22<23:48,  1.60s/it]

training loss: 2.9342615604400635
training loss: 2.9031858444213867
training loss: 2.8524274826049805
training loss: 2.903606414794922


training:   2%|██▍                                                                                                                      | 18/907 [00:42<38:06,  2.57s/it]

training loss: 2.843022108078003
training loss: 2.852750778198242
training loss: 2.8920300006866455


training:   2%|██▊                                                                                                                      | 21/907 [00:58<51:01,  3.46s/it]

training loss: 2.878241777420044
training loss: 2.925377607345581


training:   3%|███                                                                                                                      | 23/907 [01:09<59:53,  4.06s/it]

training loss: 2.879965305328369
training loss: 2.8433873653411865


training:   3%|███                                                                                                                      | 23/907 [01:20<59:53,  4.06s/it]

training loss: 2.886674404144287


training:   3%|███▎                                                                                                                   | 25/907 [01:21<1:07:04,  4.56s/it]

validation loss: 2.8997089862823486
training loss: 2.900817394256592


training:   3%|███▌                                                                                                                   | 27/907 [01:32<1:10:42,  4.82s/it]

training loss: 2.8899505138397217
training loss: 2.870828628540039


training:   3%|███▊                                                                                                                   | 29/907 [01:43<1:13:22,  5.01s/it]

training loss: 2.9003026485443115
training loss: 2.87022066116333


training:   3%|████                                                                                                                   | 31/907 [01:54<1:15:16,  5.16s/it]

training loss: 2.9200334548950195
training loss: 2.902435302734375


training:   4%|████▎                                                                                                                  | 33/907 [02:04<1:15:53,  5.21s/it]

training loss: 2.8806278705596924
training loss: 2.8884077072143555


training:   4%|████▌                                                                                                                  | 35/907 [02:15<1:15:45,  5.21s/it]

training loss: 2.8654325008392334
training loss: 2.8800108432769775
training loss: 2.897653579711914


training:   4%|████▊                                                                                                                  | 37/907 [02:25<1:15:43,  5.22s/it]

validation loss: 2.8574578762054443
training loss: 2.8965628147125244


training:   4%|█████                                                                                                                  | 39/907 [02:36<1:15:50,  5.24s/it]

training loss: 2.9113173484802246
training loss: 2.909869909286499


training:   5%|█████▍                                                                                                                 | 41/907 [02:46<1:15:54,  5.26s/it]

training loss: 2.885634183883667
training loss: 2.835496664047241
training loss: 2.9591705799102783


training:   5%|█████▊                                                                                                                 | 44/907 [03:01<1:13:31,  5.11s/it]

training loss: 2.928910732269287
training loss: 2.886579990386963
training loss: 2.9109437465667725


training:   5%|██████▏                                                                                                                | 47/907 [03:17<1:15:11,  5.25s/it]

training loss: 2.8677852153778076
training loss: 2.9000658988952637
training loss: 2.8848397731781006


training:   5%|██████▍                                                                                                                | 49/907 [03:30<1:18:50,  5.51s/it]

validation loss: 2.8366000652313232
training loss: 2.8834447860717773


training:   6%|██████▋                                                                                                                | 51/907 [03:40<1:16:52,  5.39s/it]

training loss: 2.873677968978882
training loss: 2.885634183883667


training:   6%|██████▉                                                                                                                | 53/907 [03:50<1:15:27,  5.30s/it]

training loss: 2.872572898864746
training loss: 2.8517653942108154


training:   6%|███████▏                                                                                                               | 55/907 [04:02<1:18:00,  5.49s/it]

training loss: 2.866286039352417
training loss: 2.8367462158203125


training:   6%|███████▍                                                                                                               | 57/907 [04:13<1:18:29,  5.54s/it]

training loss: 2.875331401824951
training loss: 2.9247841835021973


training:   7%|███████▋                                                                                                               | 59/907 [04:24<1:18:27,  5.55s/it]

training loss: 2.8578097820281982
training loss: 2.834632158279419
training loss: 2.8636629581451416
validation loss: 2.8821144104003906
training loss: 2.950127363204956


training:   7%|████████▎                                                                                                              | 63/907 [04:35<1:05:50,  4.68s/it]

training loss: 2.835463762283325
training loss: 2.8358960151672363
training loss: 2.8704750537872314
training loss: 2.8471338748931885
training loss: 2.91045880317688
training loss: 2.894404649734497
training loss: 2.8517966270446777
training loss: 2.864985227584839


training:   8%|█████████▍                                                                                                               | 71/907 [04:45<51:00,  3.66s/it]

training loss: 2.8999576568603516
training loss: 2.8737552165985107
training loss: 2.933304786682129
validation loss: 2.8782269954681396
training loss: 2.8508808612823486
training loss: 2.875194787979126
training loss: 2.8734428882598877
training loss: 2.8993163108825684
training loss: 2.8664710521698


training:   9%|██████████▌                                                                                                              | 79/907 [05:01<43:38,  3.16s/it]

training loss: 2.8523058891296387
training loss: 2.8971993923187256
training loss: 2.844594717025757
training loss: 2.8970789909362793
training loss: 2.8627254962921143
training loss: 2.8333287239074707


training:   9%|██████████▌                                                                                                              | 79/907 [05:20<43:38,  3.16s/it]

training loss: 2.8799381256103516


training:   9%|███████████▎                                                                                                             | 85/907 [05:25<46:25,  3.39s/it]

validation loss: 2.8566815853118896
training loss: 2.865375280380249
training loss: 2.862090826034546


training:  10%|███████████▋                                                                                                             | 88/907 [05:40<52:52,  3.87s/it]

training loss: 2.900110960006714
training loss: 2.9032044410705566


training:  10%|████████████                                                                                                             | 90/907 [05:51<59:28,  4.37s/it]

training loss: 2.895458221435547
training loss: 2.854670763015747


training:  10%|████████████                                                                                                           | 92/907 [06:01<1:02:37,  4.61s/it]

training loss: 2.854717493057251
training loss: 2.830050230026245
training loss: 2.874143362045288


training:  10%|████████████▍                                                                                                          | 95/907 [06:15<1:03:16,  4.68s/it]

training loss: 2.8809759616851807
training loss: 2.8674962520599365
training loss: 2.8277719020843506
validation loss: 2.864978075027466


training:  11%|████████████▊                                                                                                          | 98/907 [06:31<1:05:20,  4.85s/it]

training loss: 2.890329360961914
training loss: 2.8890881538391113


training:  11%|█████████████                                                                                                         | 100/907 [06:42<1:06:21,  4.93s/it]

training loss: 2.8476574420928955
training loss: 2.9298782348632812


training:  11%|█████████████▎                                                                                                        | 102/907 [06:52<1:06:40,  4.97s/it]

training loss: 2.8874847888946533
training loss: 2.8636910915374756


training:  11%|█████████████▌                                                                                                        | 104/907 [07:03<1:08:50,  5.14s/it]

training loss: 2.8192553520202637
training loss: 2.855599880218506
training loss: 2.92486310005188


training:  12%|█████████████▉                                                                                                        | 107/907 [07:17<1:07:34,  5.07s/it]

training loss: 2.866274118423462
training loss: 2.856764316558838
training loss: 2.880486488342285
validation loss: 2.808201551437378


training:  12%|██████████████▎                                                                                                       | 110/907 [07:34<1:08:59,  5.19s/it]

training loss: 2.889227867126465
training loss: 2.8522727489471436
training loss: 2.842407703399658


training:  12%|██████████████▋                                                                                                       | 113/907 [07:48<1:06:55,  5.06s/it]

training loss: 2.8765952587127686
training loss: 2.8673255443573
training loss: 2.8471696376800537


training:  13%|███████████████                                                                                                       | 116/907 [08:04<1:07:48,  5.14s/it]

training loss: 2.9087400436401367
training loss: 2.89770245552063


training:  13%|███████████████▎                                                                                                      | 118/907 [08:14<1:07:46,  5.15s/it]

training loss: 2.852139711380005
training loss: 2.865443706512451


training:  13%|███████████████▌                                                                                                      | 120/907 [08:25<1:08:07,  5.19s/it]

training loss: 2.8757517337799072
training loss: 2.8642220497131348
validation loss: 2.866628646850586


training:  13%|███████████████▊                                                                                                      | 122/907 [08:36<1:09:39,  5.32s/it]

training loss: 2.8906989097595215
training loss: 2.9346489906311035


training:  14%|████████████████▏                                                                                                     | 124/907 [08:47<1:10:28,  5.40s/it]

training loss: 2.9014673233032227
training loss: 2.884484052658081
training loss: 2.8341221809387207


training:  14%|████████████████▌                                                                                                     | 127/907 [09:02<1:08:01,  5.23s/it]

training loss: 2.863945484161377
training loss: 2.8430705070495605
training loss: 2.8767426013946533


training:  14%|████████████████▉                                                                                                     | 130/907 [09:18<1:07:34,  5.22s/it]

training loss: 2.9156978130340576
training loss: 2.870755195617676
training loss: 2.8941445350646973


training:  14%|████████████████▉                                                                                                     | 130/907 [09:30<1:07:34,  5.22s/it]

training loss: 2.907104015350342


training:  15%|█████████████████▎                                                                                                    | 133/907 [09:33<1:07:38,  5.24s/it]

validation loss: 2.88327693939209
training loss: 2.868713140487671
training loss: 2.8894383907318115


training:  15%|█████████████████▋                                                                                                    | 136/907 [09:48<1:05:20,  5.08s/it]

training loss: 2.892878293991089
training loss: 2.8765852451324463
training loss: 2.8866500854492188


training:  15%|██████████████████                                                                                                    | 139/907 [10:02<1:03:44,  4.98s/it]

training loss: 2.898771047592163
training loss: 2.915546417236328
training loss: 2.9006845951080322
training loss: 2.855393171310425


training:  16%|██████████████████▉                                                                                                     | 143/907 [10:13<54:55,  4.31s/it]

training loss: 2.8597629070281982
training loss: 2.8685994148254395
training loss: 2.8534655570983887
validation loss: 2.868523597717285
training loss: 2.89875864982605
training loss: 2.8312392234802246
training loss: 2.9056432247161865


training:  16%|███████████████████▋                                                                                                    | 149/907 [10:26<46:34,  3.69s/it]

training loss: 2.8482794761657715
training loss: 2.886601686477661
training loss: 2.869157552719116


training:  17%|████████████████████                                                                                                    | 152/907 [10:41<51:21,  4.08s/it]

training loss: 2.816657781600952
training loss: 2.8971681594848633


training:  17%|████████████████████▎                                                                                                   | 154/907 [10:52<55:44,  4.44s/it]

training loss: 2.8602845668792725
training loss: 2.908949375152588


training:  17%|████████████████████▋                                                                                                   | 156/907 [11:03<59:10,  4.73s/it]

training loss: 2.846182107925415
training loss: 2.952475070953369
validation loss: 2.860400438308716


training:  17%|████████████████████▌                                                                                                 | 158/907 [11:13<1:00:42,  4.86s/it]

training loss: 2.8701703548431396
training loss: 2.8363449573516846
training loss: 2.8909568786621094


training:  18%|████████████████████▉                                                                                                 | 161/907 [11:28<1:01:00,  4.91s/it]

training loss: 2.9113097190856934
training loss: 2.902440309524536


training:  18%|█████████████████████▏                                                                                                | 163/907 [11:38<1:01:22,  4.95s/it]

training loss: 2.854436159133911
training loss: 2.921633720397949
training loss: 2.8670482635498047


training:  18%|█████████████████████▌                                                                                                | 166/907 [11:53<1:00:53,  4.93s/it]

training loss: 2.928283452987671
training loss: 2.8794100284576416
training loss: 2.8746211528778076
training loss: 2.874447822570801


training:  19%|█████████████████████▉                                                                                                | 169/907 [12:07<1:00:28,  4.92s/it]

validation loss: 2.8823986053466797
training loss: 2.9300079345703125
training loss: 2.8415212631225586


training:  19%|██████████████████████▍                                                                                               | 172/907 [12:22<1:00:06,  4.91s/it]

training loss: 2.918144464492798
training loss: 2.8937039375305176
training loss: 2.832075357437134
training loss: 2.9156975746154785
training loss: 2.8866944313049316


training:  20%|███████████████████████▍                                                                                                | 177/907 [12:36<52:19,  4.30s/it]

training loss: 2.8969030380249023
training loss: 2.845634937286377
training loss: 2.9086050987243652


training:  20%|███████████████████████▊                                                                                                | 180/907 [12:51<53:51,  4.44s/it]

training loss: 2.882566213607788
training loss: 2.9275856018066406
validation loss: 2.8489325046539307
training loss: 2.903024435043335
training loss: 2.8803789615631104


training:  20%|████████████████████████▎                                                                                               | 184/907 [13:02<47:47,  3.97s/it]

training loss: 2.838437795639038
training loss: 2.8344719409942627
training loss: 2.868859052658081
training loss: 2.890730619430542


training:  21%|████████████████████████▊                                                                                               | 188/907 [13:21<50:33,  4.22s/it]

training loss: 2.8509724140167236
training loss: 2.8270719051361084
training loss: 2.884260654449463


training:  21%|█████████████████████████▎                                                                                              | 191/907 [13:36<52:15,  4.38s/it]

training loss: 2.879885673522949
training loss: 2.8663408756256104
training loss: 2.8638968467712402
validation loss: 2.8523690700531006


training:  21%|█████████████████████████▋                                                                                              | 194/907 [13:51<54:45,  4.61s/it]

training loss: 2.924309492111206
training loss: 2.874070644378662
training loss: 2.933112859725952


training:  22%|██████████████████████████                                                                                              | 197/907 [14:06<55:38,  4.70s/it]

training loss: 2.8799736499786377
training loss: 2.868009567260742
training loss: 2.913086414337158


training:  22%|██████████████████████████▍                                                                                             | 200/907 [14:21<56:25,  4.79s/it]

training loss: 2.9306187629699707
training loss: 2.8832013607025146
training loss: 2.903432846069336


training:  22%|██████████████████████████▊                                                                                             | 203/907 [14:36<57:20,  4.89s/it]

training loss: 2.867419958114624
training loss: 2.954939842224121
training loss: 2.891828775405884


training:  23%|███████████████████████████                                                                                             | 205/907 [14:47<58:53,  5.03s/it]

validation loss: 2.86893892288208
training loss: 2.8764238357543945
training loss: 2.882599353790283


training:  23%|███████████████████████████▌                                                                                            | 208/907 [15:02<58:26,  5.02s/it]

training loss: 2.8690576553344727
training loss: 2.850309371948242
training loss: 2.9185755252838135


training:  23%|███████████████████████████▉                                                                                            | 211/907 [15:17<57:51,  4.99s/it]

training loss: 2.885584592819214
training loss: 2.9372830390930176
training loss: 2.8953444957733154


training:  24%|████████████████████████████▎                                                                                           | 214/907 [15:32<57:50,  5.01s/it]

training loss: 2.8845114707946777
training loss: 2.8721911907196045
training loss: 2.867340326309204
training loss: 2.8958632946014404


training:  24%|████████████████████████████▋                                                                                           | 217/907 [15:47<58:19,  5.07s/it]

validation loss: 2.888152837753296
training loss: 2.865961790084839
training loss: 2.8805623054504395


training:  24%|█████████████████████████████                                                                                           | 220/907 [16:02<57:38,  5.03s/it]

training loss: 2.8511276245117188
training loss: 2.8925650119781494
training loss: 2.8883209228515625


training:  25%|█████████████████████████████▌                                                                                          | 223/907 [16:17<56:41,  4.97s/it]

training loss: 2.872307777404785
training loss: 2.882187843322754
training loss: 2.8949427604675293


training:  25%|█████████████████████████████▉                                                                                          | 226/907 [16:28<52:11,  4.60s/it]

training loss: 2.865187168121338
training loss: 2.8716983795166016
training loss: 2.898632049560547


training:  25%|█████████████████████████████▉                                                                                          | 226/907 [16:40<52:11,  4.60s/it]

training loss: 2.8507206439971924


training:  25%|██████████████████████████████▎                                                                                         | 229/907 [16:44<54:59,  4.87s/it]

validation loss: 2.8502211570739746
training loss: 2.8738560676574707
training loss: 2.8821892738342285


training:  26%|██████████████████████████████▋                                                                                         | 232/907 [16:59<54:19,  4.83s/it]

training loss: 2.8222243785858154
training loss: 2.8699254989624023
training loss: 2.8921096324920654
training loss: 2.869374990463257


training:  26%|███████████████████████████████▏                                                                                        | 236/907 [17:10<47:03,  4.21s/it]

training loss: 2.9097564220428467
training loss: 2.89322829246521
training loss: 2.8873445987701416
training loss: 2.8455758094787598
training loss: 2.8687851428985596
training loss: 2.8969664573669434


training:  27%|███████████████████████████████▉                                                                                        | 241/907 [17:24<42:20,  3.81s/it]

validation loss: 2.8959765434265137
training loss: 2.8470406532287598
training loss: 2.833634853363037
training loss: 2.8997600078582764


training:  27%|████████████████████████████████▍                                                                                       | 245/907 [17:44<46:10,  4.18s/it]

training loss: 2.8400936126708984
training loss: 2.878591775894165


training:  27%|████████████████████████████████▋                                                                                       | 247/907 [17:55<49:03,  4.46s/it]

training loss: 2.832298994064331
training loss: 2.952059745788574


training:  27%|████████████████████████████████▉                                                                                       | 249/907 [18:05<51:00,  4.65s/it]

training loss: 2.8535170555114746
training loss: 2.8878090381622314


training:  28%|█████████████████████████████████▏                                                                                      | 251/907 [18:15<52:39,  4.82s/it]

training loss: 2.854062557220459
training loss: 2.9026927947998047
training loss: 2.856018543243408


training:  28%|█████████████████████████████████▍                                                                                      | 253/907 [18:26<54:31,  5.00s/it]

validation loss: 2.8777408599853516
training loss: 2.89859938621521
training loss: 2.8498458862304688


training:  28%|█████████████████████████████████▊                                                                                      | 256/907 [18:41<53:50,  4.96s/it]

training loss: 2.8800292015075684
training loss: 2.8788039684295654
training loss: 2.894968271255493


training:  29%|██████████████████████████████████▎                                                                                     | 259/907 [18:55<52:56,  4.90s/it]

training loss: 2.888610601425171
training loss: 2.8855221271514893
training loss: 2.891526699066162


training:  29%|██████████████████████████████████▋                                                                                     | 262/907 [19:09<51:48,  4.82s/it]

training loss: 2.862816095352173
training loss: 2.868557929992676
training loss: 2.895094871520996


training:  29%|██████████████████████████████████▋                                                                                     | 262/907 [19:20<51:48,  4.82s/it]

training loss: 2.89449143409729


training:  29%|███████████████████████████████████                                                                                     | 265/907 [19:23<51:36,  4.82s/it]

validation loss: 2.827162981033325
training loss: 2.878175735473633
training loss: 2.8121302127838135


training:  30%|███████████████████████████████████▍                                                                                    | 268/907 [19:37<50:25,  4.73s/it]

training loss: 2.864042282104492
training loss: 2.9047882556915283
training loss: 2.864257574081421


training:  30%|███████████████████████████████████▊                                                                                    | 271/907 [19:50<49:09,  4.64s/it]

training loss: 2.9240164756774902
training loss: 2.8699984550476074
training loss: 2.9437859058380127


training:  30%|████████████████████████████████████▎                                                                                   | 274/907 [20:03<47:55,  4.54s/it]

training loss: 2.8424644470214844
training loss: 2.9083714485168457
training loss: 2.8215341567993164
training loss: 2.850032329559326


training:  31%|████████████████████████████████████▋                                                                                   | 277/907 [20:17<48:28,  4.62s/it]

validation loss: 2.9267728328704834
training loss: 2.9155519008636475
training loss: 2.8990159034729004


training:  31%|█████████████████████████████████████                                                                                   | 280/907 [20:31<48:28,  4.64s/it]

training loss: 2.8697078227996826
training loss: 2.8585658073425293
training loss: 2.8763234615325928


training:  31%|█████████████████████████████████████▍                                                                                  | 283/907 [20:46<48:52,  4.70s/it]

training loss: 2.8606321811676025
training loss: 2.915860176086426
training loss: 2.90108585357666


training:  32%|█████████████████████████████████████▊                                                                                  | 286/907 [21:00<48:59,  4.73s/it]

training loss: 2.872741937637329
training loss: 2.857574462890625
training loss: 2.8747220039367676
training loss: 2.929021120071411
validation loss: 2.841343641281128
training loss: 2.8708574771881104
training loss: 2.8772542476654053
training loss: 2.8312995433807373
training loss: 2.8750412464141846


training:  32%|██████████████████████████████████████▉                                                                                 | 294/907 [21:11<37:59,  3.72s/it]

training loss: 2.8869497776031494
training loss: 2.8764142990112305
training loss: 2.9085190296173096
training loss: 2.881186008453369
training loss: 2.890085220336914
training loss: 2.8821117877960205


training:  33%|███████████████████████████████████████▋                                                                                | 300/907 [21:30<35:51,  3.54s/it]

training loss: 2.8228774070739746
training loss: 2.8204283714294434
validation loss: 2.8434417247772217
training loss: 2.837344169616699
training loss: 2.869879722595215


training:  34%|████████████████████████████████████████▏                                                                               | 304/907 [21:48<38:17,  3.81s/it]

training loss: 2.888479709625244
training loss: 2.8950693607330322
training loss: 2.8151583671569824


training:  34%|████████████████████████████████████████▌                                                                               | 307/907 [22:00<38:50,  3.88s/it]

training loss: 2.8663909435272217
training loss: 2.855463743209839
training loss: 2.855863332748413


training:  34%|█████████████████████████████████████████                                                                               | 310/907 [22:14<40:48,  4.10s/it]

training loss: 2.8977773189544678
training loss: 2.8684799671173096
training loss: 2.8560609817504883
training loss: 2.9242613315582275


training:  35%|█████████████████████████████████████████▍                                                                              | 313/907 [22:29<43:30,  4.40s/it]

validation loss: 2.9640583992004395
training loss: 2.913010358810425
training loss: 2.8572089672088623


training:  35%|█████████████████████████████████████████▊                                                                              | 316/907 [22:42<42:51,  4.35s/it]

training loss: 2.901265859603882
training loss: 2.859739303588867
training loss: 2.915018320083618


training:  35%|██████████████████████████████████████████▏                                                                             | 319/907 [22:54<42:13,  4.31s/it]

training loss: 2.922494411468506
training loss: 2.847838878631592
training loss: 2.918611526489258


training:  36%|██████████████████████████████████████████▌                                                                             | 322/907 [23:08<42:09,  4.32s/it]

training loss: 2.8975865840911865
training loss: 2.8531055450439453
training loss: 2.8543460369110107
training loss: 2.897289514541626
validation loss: 2.8933799266815186
training loss: 2.8849940299987793


training:  36%|███████████████████████████████████████████▎                                                                            | 327/907 [23:19<35:43,  3.70s/it]

training loss: 2.908686876296997
training loss: 2.8619751930236816
training loss: 2.864166736602783
training loss: 2.843327760696411
training loss: 2.839959144592285
training loss: 2.890662670135498
training loss: 2.8657076358795166
training loss: 2.8686411380767822


training:  37%|████████████████████████████████████████████▎                                                                           | 335/907 [23:31<29:13,  3.07s/it]

training loss: 2.8689591884613037
training loss: 2.880427360534668
training loss: 2.8415963649749756
validation loss: 2.9056832790374756
training loss: 2.8941738605499268
training loss: 2.9067559242248535
training loss: 2.8391284942626953
training loss: 2.8817362785339355


training:  38%|█████████████████████████████████████████████▏                                                                          | 342/907 [23:50<27:48,  2.95s/it]

training loss: 2.8899998664855957
training loss: 2.912804126739502
training loss: 2.8806285858154297
training loss: 2.8863141536712646


training:  38%|█████████████████████████████████████████████▊                                                                          | 346/907 [24:01<26:59,  2.89s/it]

training loss: 2.8631887435913086
training loss: 2.8921408653259277
training loss: 2.844141721725464
training loss: 2.858126640319824
validation loss: 2.8721776008605957


training:  39%|██████████████████████████████████████████████▎                                                                         | 350/907 [24:12<26:08,  2.82s/it]

training loss: 2.8491268157958984
training loss: 2.904386520385742
training loss: 2.913787841796875
training loss: 2.9137041568756104


training:  39%|██████████████████████████████████████████████▊                                                                         | 354/907 [24:28<29:31,  3.20s/it]

training loss: 2.8258330821990967
training loss: 2.8753271102905273
training loss: 2.8449504375457764


training:  39%|███████████████████████████████████████████████▏                                                                        | 357/907 [24:41<32:00,  3.49s/it]

training loss: 2.900036096572876
training loss: 2.878668785095215
training loss: 2.9354889392852783


training:  40%|███████████████████████████████████████████████▋                                                                        | 360/907 [24:53<33:04,  3.63s/it]

training loss: 2.8643910884857178
training loss: 2.8700714111328125
validation loss: 2.9300715923309326
training loss: 2.855578660964966


training:  40%|████████████████████████████████████████████████                                                                        | 363/907 [25:04<33:28,  3.69s/it]

training loss: 2.8833487033843994
training loss: 2.8174567222595215
training loss: 2.851900100708008
training loss: 2.9095070362091064


training:  40%|████████████████████████████████████████████████▌                                                                       | 367/907 [25:18<32:25,  3.60s/it]

training loss: 2.8948042392730713
training loss: 2.8620409965515137
training loss: 2.8242413997650146


training:  41%|████████████████████████████████████████████████▉                                                                       | 370/907 [25:30<33:13,  3.71s/it]

training loss: 2.8881843090057373
training loss: 2.899848699569702
training loss: 2.8675334453582764


training:  41%|████████████████████████████████████████████████▉                                                                       | 370/907 [25:40<33:13,  3.71s/it]

training loss: 2.8872902393341064


training:  41%|█████████████████████████████████████████████████▎                                                                      | 373/907 [25:41<33:36,  3.78s/it]

validation loss: 2.940242052078247
training loss: 2.8961451053619385
training loss: 2.8592331409454346


training:  41%|█████████████████████████████████████████████████▋                                                                      | 376/907 [25:53<33:55,  3.83s/it]

training loss: 2.8554463386535645
training loss: 2.8600385189056396
training loss: 2.852109909057617


training:  42%|██████████████████████████████████████████████████▏                                                                     | 379/907 [26:05<33:33,  3.81s/it]

training loss: 2.903067111968994
training loss: 2.86142897605896
training loss: 2.893322706222534


training:  42%|██████████████████████████████████████████████████▌                                                                     | 382/907 [26:16<33:08,  3.79s/it]

training loss: 2.8582730293273926
training loss: 2.890244245529175
training loss: 2.863830804824829
training loss: 2.8770592212677


training:  42%|██████████████████████████████████████████████████▉                                                                     | 385/907 [26:29<34:53,  4.01s/it]

validation loss: 2.8359768390655518
training loss: 2.886054039001465
training loss: 2.8895373344421387
training loss: 2.882462978363037
training loss: 2.889650821685791
training loss: 2.866955041885376
training loss: 2.8803727626800537


training:  43%|███████████████████████████████████████████████████▊                                                                    | 392/907 [26:41<28:16,  3.29s/it]

training loss: 2.8698198795318604
training loss: 2.8679068088531494
training loss: 2.898560047149658
training loss: 2.8475840091705322
training loss: 2.8788530826568604
training loss: 2.893732786178589
validation loss: 2.8798043727874756
training loss: 2.9262940883636475


training:  44%|████████████████████████████████████████████████████▊                                                                   | 399/907 [26:51<23:11,  2.74s/it]

training loss: 2.887251377105713
training loss: 2.828066825866699
training loss: 2.8903684616088867
training loss: 2.8543684482574463
training loss: 2.8866329193115234
training loss: 2.855607748031616
training loss: 2.881338596343994


training:  45%|█████████████████████████████████████████████████████▋                                                                  | 406/907 [27:01<19:35,  2.35s/it]

training loss: 2.8902175426483154
training loss: 2.875920534133911
training loss: 2.8750298023223877
training loss: 2.870751142501831
validation loss: 2.9705724716186523
training loss: 2.882329225540161
training loss: 2.908268690109253
training loss: 2.864016056060791


training:  46%|██████████████████████████████████████████████████████▋                                                                 | 413/907 [27:11<17:06,  2.08s/it]

training loss: 2.893563985824585
training loss: 2.8795242309570312
training loss: 2.822312831878662
training loss: 2.871554374694824
training loss: 2.859285831451416
training loss: 2.8967490196228027
training loss: 2.794935464859009


training:  46%|███████████████████████████████████████████████████████▌                                                                | 420/907 [27:28<17:52,  2.20s/it]

training loss: 2.8370208740234375
training loss: 2.90793514251709
validation loss: 2.9050393104553223
training loss: 2.8872270584106445
training loss: 2.8702902793884277
training loss: 2.8605797290802


training:  47%|████████████████████████████████████████████████████████▏                                                               | 425/907 [27:41<18:25,  2.29s/it]

training loss: 2.8286709785461426
training loss: 2.8813023567199707
training loss: 2.9014017581939697
training loss: 2.8681561946868896


training:  47%|████████████████████████████████████████████████████████▊                                                               | 429/907 [27:54<20:35,  2.58s/it]

training loss: 2.8475704193115234
training loss: 2.88392972946167
training loss: 2.8567283153533936
training loss: 2.9141149520874023


training:  47%|████████████████████████████████████████████████████████▊                                                               | 429/907 [28:10<20:35,  2.58s/it]

training loss: 2.885585308074951


training:  48%|█████████████████████████████████████████████████████████▎                                                              | 433/907 [28:13<25:48,  3.27s/it]

validation loss: 2.9371254444122314
training loss: 2.890662670135498
training loss: 2.9071109294891357


training:  48%|█████████████████████████████████████████████████████████▋                                                              | 436/907 [28:28<29:19,  3.73s/it]

training loss: 2.834111452102661
training loss: 2.887244462966919
training loss: 2.8884825706481934


training:  48%|██████████████████████████████████████████████████████████                                                              | 439/907 [28:42<31:40,  4.06s/it]

training loss: 2.87544846534729
training loss: 2.888986587524414
training loss: 2.9058585166931152


training:  49%|██████████████████████████████████████████████████████████▍                                                             | 442/907 [28:57<33:22,  4.31s/it]

training loss: 2.8888132572174072
training loss: 2.8779420852661133
training loss: 2.8523271083831787


training:  49%|██████████████████████████████████████████████████████████▍                                                             | 442/907 [29:10<33:22,  4.31s/it]

training loss: 2.86381459236145


training:  49%|██████████████████████████████████████████████████████████▉                                                             | 445/907 [29:11<33:55,  4.41s/it]

validation loss: 2.8262693881988525
training loss: 2.847694158554077
training loss: 2.8603742122650146


training:  49%|███████████████████████████████████████████████████████████▎                                                            | 448/907 [29:25<34:13,  4.47s/it]

training loss: 2.833115577697754
training loss: 2.8361330032348633
training loss: 2.9180409908294678


training:  50%|███████████████████████████████████████████████████████████▋                                                            | 451/907 [29:38<34:05,  4.49s/it]

training loss: 2.964952230453491
training loss: 2.8526792526245117
training loss: 2.869189977645874


training:  50%|████████████████████████████████████████████████████████████                                                            | 454/907 [29:52<33:41,  4.46s/it]

training loss: 2.882868766784668
training loss: 2.8766281604766846
training loss: 2.9105937480926514
training loss: 2.827897548675537


training:  50%|████████████████████████████████████████████████████████████▍                                                           | 457/907 [30:05<33:13,  4.43s/it]

validation loss: 2.8834338188171387
training loss: 2.858764886856079
training loss: 2.8508169651031494


training:  51%|████████████████████████████████████████████████████████████▊                                                           | 460/907 [30:19<33:26,  4.49s/it]

training loss: 2.8668806552886963
training loss: 2.8781814575195312
training loss: 2.8781495094299316


training:  51%|█████████████████████████████████████████████████████████████▎                                                          | 463/907 [30:31<32:34,  4.40s/it]

training loss: 2.8162903785705566
training loss: 2.913775682449341
training loss: 2.86868953704834


training:  51%|█████████████████████████████████████████████████████████████▋                                                          | 466/907 [30:44<32:15,  4.39s/it]

training loss: 2.9176783561706543
training loss: 2.862811803817749
training loss: 2.861011028289795
training loss: 2.8606488704681396


training:  52%|██████████████████████████████████████████████████████████████                                                          | 469/907 [30:58<32:39,  4.47s/it]

validation loss: 2.864964246749878
training loss: 2.8931097984313965
training loss: 2.864898204803467


training:  52%|██████████████████████████████████████████████████████████████▍                                                         | 472/907 [31:11<32:18,  4.46s/it]

training loss: 2.91726016998291
training loss: 2.92714262008667
training loss: 2.8520567417144775


training:  52%|██████████████████████████████████████████████████████████████▊                                                         | 475/907 [31:25<32:13,  4.48s/it]

training loss: 2.887478828430176
training loss: 2.856241464614868
training loss: 2.9013924598693848


training:  53%|███████████████████████████████████████████████████████████████▏                                                        | 478/907 [31:39<32:05,  4.49s/it]

training loss: 2.865023374557495
training loss: 2.9519054889678955
training loss: 2.8637378215789795


training:  53%|███████████████████████████████████████████████████████████████▏                                                        | 478/907 [31:50<32:05,  4.49s/it]

training loss: 2.8650646209716797


training:  53%|███████████████████████████████████████████████████████████████▋                                                        | 481/907 [31:52<31:59,  4.51s/it]

validation loss: 2.9106712341308594
training loss: 2.9249258041381836
training loss: 2.868375301361084


training:  53%|████████████████████████████████████████████████████████████████                                                        | 484/907 [32:06<31:56,  4.53s/it]

training loss: 2.869351387023926
training loss: 2.913886785507202
training loss: 2.9333598613739014


training:  54%|████████████████████████████████████████████████████████████████▍                                                       | 487/907 [32:19<31:30,  4.50s/it]

training loss: 2.851707696914673
training loss: 2.9207868576049805
training loss: 3.120990753173828


training:  54%|████████████████████████████████████████████████████████████████▊                                                       | 490/907 [32:33<31:45,  4.57s/it]

training loss: 2.882324457168579
training loss: 2.8953399658203125
training loss: 2.8885700702667236
training loss: 2.9434311389923096


training:  54%|█████████████████████████████████████████████████████████████████▏                                                      | 493/907 [32:45<30:12,  4.38s/it]

validation loss: 2.85077166557312
training loss: 2.881809949874878
training loss: 2.927461624145508


training:  55%|█████████████████████████████████████████████████████████████████▌                                                      | 496/907 [32:58<29:42,  4.34s/it]

training loss: 2.928020715713501
training loss: 2.905006170272827
training loss: 3.0002634525299072


training:  55%|██████████████████████████████████████████████████████████████████                                                      | 499/907 [33:12<30:02,  4.42s/it]

training loss: 2.9177610874176025
training loss: 2.848245620727539
training loss: 2.86922550201416


training:  55%|██████████████████████████████████████████████████████████████████▍                                                     | 502/907 [33:26<30:14,  4.48s/it]

training loss: 2.885958194732666
training loss: 2.8776490688323975
training loss: 2.8838303089141846


training:  55%|██████████████████████████████████████████████████████████████████▍                                                     | 502/907 [33:40<30:14,  4.48s/it]

training loss: 2.886319160461426


training:  56%|██████████████████████████████████████████████████████████████████▊                                                     | 505/907 [33:41<31:08,  4.65s/it]

validation loss: 2.915534019470215
training loss: 2.8846726417541504
training loss: 2.8539791107177734


training:  56%|███████████████████████████████████████████████████████████████████▏                                                    | 508/907 [33:54<30:30,  4.59s/it]

training loss: 2.9065380096435547
training loss: 2.9015188217163086
training loss: 2.8494107723236084


training:  56%|███████████████████████████████████████████████████████████████████▌                                                    | 511/907 [34:08<30:37,  4.64s/it]

training loss: 2.8653810024261475
training loss: 2.8878989219665527
training loss: 2.8583099842071533


training:  57%|████████████████████████████████████████████████████████████████████                                                    | 514/907 [34:22<30:10,  4.61s/it]

training loss: 2.919313669204712
training loss: 2.89670467376709
training loss: 2.9029533863067627


training:  57%|████████████████████████████████████████████████████████████████████▍                                                   | 517/907 [34:33<28:25,  4.37s/it]

training loss: 2.8656117916107178
validation loss: 2.9076130390167236
training loss: 2.8920140266418457
training loss: 2.8710720539093018
training loss: 2.902401924133301
training loss: 2.8946027755737305
training loss: 2.880626678466797
training loss: 2.889674186706543
training loss: 2.863906145095825


training:  58%|█████████████████████████████████████████████████████████████████████▍                                                  | 525/907 [34:46<22:22,  3.51s/it]

training loss: 2.9186270236968994
training loss: 2.943713665008545
training loss: 2.903195381164551
training loss: 2.8369545936584473
training loss: 2.870077610015869
validation loss: 2.8676228523254395
training loss: 2.9126389026641846


training:  59%|██████████████████████████████████████████████████████████████████████▎                                                 | 531/907 [35:01<20:22,  3.25s/it]

training loss: 2.916738271713257
training loss: 2.865615129470825
training loss: 2.8995003700256348
training loss: 2.8592584133148193


training:  59%|██████████████████████████████████████████████████████████████████████▊                                                 | 535/907 [35:15<20:32,  3.31s/it]

training loss: 2.8491928577423096
training loss: 2.9184834957122803
training loss: 2.8424465656280518


training:  59%|███████████████████████████████████████████████████████████████████████▏                                                | 538/907 [35:27<21:33,  3.50s/it]

training loss: 2.909137487411499
training loss: 2.8558566570281982
training loss: 2.881761312484741
training loss: 2.879148483276367


training:  60%|███████████████████████████████████████████████████████████████████████▌                                                | 541/907 [35:38<21:44,  3.57s/it]

validation loss: 2.8547556400299072
training loss: 2.8337016105651855
training loss: 2.878631591796875


training:  60%|███████████████████████████████████████████████████████████████████████▉                                                | 544/907 [35:50<22:06,  3.65s/it]

training loss: 2.9045257568359375
training loss: 2.903195381164551
training loss: 2.876098871231079


training:  60%|████████████████████████████████████████████████████████████████████████▎                                               | 547/907 [36:02<22:47,  3.80s/it]

training loss: 2.874620199203491
training loss: 2.820894479751587
training loss: 2.899333953857422


training:  61%|████████████████████████████████████████████████████████████████████████▊                                               | 550/907 [36:16<23:58,  4.03s/it]

training loss: 2.846684217453003
training loss: 2.8738741874694824
training loss: 2.8924307823181152
training loss: 2.8854446411132812


training:  61%|█████████████████████████████████████████████████████████████████████████▏                                              | 553/907 [36:29<24:33,  4.16s/it]

validation loss: 2.8723695278167725
training loss: 2.8609704971313477
training loss: 2.821268320083618
training loss: 2.8587546348571777
training loss: 2.8643929958343506


training:  62%|█████████████████████████████████████████████████████████████████████████▊                                              | 558/907 [36:42<21:15,  3.66s/it]

training loss: 2.9232115745544434
training loss: 2.882964849472046
training loss: 2.8467845916748047
training loss: 2.870757818222046
training loss: 2.85896372795105


training:  62%|██████████████████████████████████████████████████████████████████████████▍                                             | 563/907 [37:03<21:57,  3.83s/it]

training loss: 2.857987642288208
training loss: 2.8118162155151367
training loss: 2.8246970176696777
validation loss: 2.8797245025634766


training:  62%|██████████████████████████████████████████████████████████████████████████▉                                             | 566/907 [37:16<22:58,  4.04s/it]

training loss: 2.8686649799346924
training loss: 2.875058889389038
training loss: 2.830554485321045


training:  63%|███████████████████████████████████████████████████████████████████████████▎                                            | 569/907 [37:30<23:24,  4.15s/it]

training loss: 2.8177947998046875
training loss: 2.848497152328491
training loss: 2.8231489658355713


training:  63%|███████████████████████████████████████████████████████████████████████████▋                                            | 572/907 [37:44<24:11,  4.33s/it]

training loss: 2.925518274307251
training loss: 2.858217716217041
training loss: 2.870652914047241


training:  63%|████████████████████████████████████████████████████████████████████████████                                            | 575/907 [38:00<25:41,  4.64s/it]

training loss: 2.8860630989074707
training loss: 2.8784239292144775
training loss: 2.867530345916748


training:  64%|████████████████████████████████████████████████████████████████████████████▎                                           | 577/907 [38:10<26:28,  4.81s/it]

validation loss: 2.896383762359619
training loss: 2.864405393600464
training loss: 2.8755359649658203


training:  64%|████████████████████████████████████████████████████████████████████████████▋                                           | 580/907 [38:23<25:12,  4.62s/it]

training loss: 2.8590519428253174
training loss: 2.9089982509613037
training loss: 2.8981170654296875


training:  64%|█████████████████████████████████████████████████████████████████████████████▏                                          | 583/907 [38:36<24:35,  4.55s/it]

training loss: 2.865684986114502
training loss: 2.877572774887085
training loss: 2.8496458530426025


training:  65%|█████████████████████████████████████████████████████████████████████████████▌                                          | 586/907 [38:51<24:54,  4.65s/it]

training loss: 2.8513855934143066
training loss: 2.8566524982452393
training loss: 2.8476836681365967
training loss: 2.8788998126983643


training:  65%|█████████████████████████████████████████████████████████████████████████████▉                                          | 589/907 [39:04<24:10,  4.56s/it]

validation loss: 2.896632194519043
training loss: 2.902996778488159
training loss: 2.9008660316467285


training:  65%|██████████████████████████████████████████████████████████████████████████████▎                                         | 592/907 [39:17<23:29,  4.47s/it]

training loss: 2.882322072982788
training loss: 2.9115123748779297
training loss: 2.9150407314300537


training:  66%|██████████████████████████████████████████████████████████████████████████████▋                                         | 595/907 [39:29<22:48,  4.39s/it]

training loss: 2.9023807048797607
training loss: 2.890591859817505
training loss: 2.8763136863708496


training:  66%|███████████████████████████████████████████████████████████████████████████████                                         | 598/907 [39:41<21:59,  4.27s/it]

training loss: 2.908001661300659
training loss: 2.878028154373169
training loss: 2.9349911212921143
training loss: 2.826775550842285


training:  66%|███████████████████████████████████████████████████████████████████████████████▌                                        | 601/907 [39:56<22:55,  4.49s/it]

validation loss: 2.893043041229248
training loss: 2.8819572925567627
training loss: 2.9435489177703857


training:  67%|███████████████████████████████████████████████████████████████████████████████▉                                        | 604/907 [40:09<22:04,  4.37s/it]

training loss: 2.9084179401397705
training loss: 2.8456292152404785
training loss: 2.9028420448303223


training:  67%|████████████████████████████████████████████████████████████████████████████████▎                                       | 607/907 [40:22<22:02,  4.41s/it]

training loss: 2.8468737602233887
training loss: 2.8495359420776367
training loss: 2.8775105476379395


training:  67%|████████████████████████████████████████████████████████████████████████████████▋                                       | 610/907 [40:36<22:21,  4.52s/it]

training loss: 2.863144874572754
training loss: 2.9254791736602783
training loss: 2.8832380771636963


training:  67%|████████████████████████████████████████████████████████████████████████████████▋                                       | 610/907 [40:50<22:21,  4.52s/it]

training loss: 2.84183669090271


training:  68%|█████████████████████████████████████████████████████████████████████████████████                                       | 613/907 [40:51<22:27,  4.58s/it]

validation loss: 2.886873483657837
training loss: 2.897953510284424
training loss: 2.8975670337677


training:  68%|█████████████████████████████████████████████████████████████████████████████████▍                                      | 616/907 [41:04<21:57,  4.53s/it]

training loss: 2.8919999599456787
training loss: 2.883838653564453
training loss: 2.906280755996704


training:  68%|█████████████████████████████████████████████████████████████████████████████████▉                                      | 619/907 [41:18<22:00,  4.58s/it]

training loss: 2.860811233520508
training loss: 2.9162797927856445
training loss: 2.8866207599639893


training:  69%|██████████████████████████████████████████████████████████████████████████████████▎                                     | 622/907 [41:31<21:24,  4.51s/it]

training loss: 2.8717846870422363
training loss: 2.884702682495117
training loss: 2.857499599456787
training loss: 2.880913257598877


training:  69%|██████████████████████████████████████████████████████████████████████████████████▋                                     | 625/907 [41:44<21:12,  4.51s/it]

validation loss: 2.8561835289001465
training loss: 2.8604519367218018
training loss: 2.892979383468628


training:  69%|███████████████████████████████████████████████████████████████████████████████████                                     | 628/907 [41:57<20:35,  4.43s/it]

training loss: 2.918996572494507
training loss: 2.9089136123657227
training loss: 2.882858991622925


training:  70%|███████████████████████████████████████████████████████████████████████████████████▍                                    | 631/907 [42:09<19:41,  4.28s/it]

training loss: 2.846432685852051
training loss: 2.94325590133667
training loss: 2.925734519958496
training loss: 2.877075672149658
training loss: 2.917940378189087
training loss: 2.85793399810791


training:  70%|████████████████████████████████████████████████████████████████████████████████████▎                                   | 637/907 [42:20<16:01,  3.56s/it]

training loss: 2.887071132659912
validation loss: 2.878310203552246
training loss: 2.8437328338623047
training loss: 2.8935534954071045
training loss: 2.8703300952911377
training loss: 2.8769805431365967
training loss: 2.875805616378784
training loss: 2.850192070007324
training loss: 2.86153507232666


training:  71%|█████████████████████████████████████████████████████████████████████████████████████▎                                  | 645/907 [42:31<12:41,  2.91s/it]

training loss: 2.826817274093628
training loss: 2.8636083602905273
training loss: 2.916342258453369
training loss: 2.852121114730835
training loss: 2.8116965293884277
validation loss: 2.9041640758514404
training loss: 2.8830385208129883
training loss: 2.945409059524536
training loss: 2.8364696502685547


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▍                                 | 653/907 [42:47<11:07,  2.63s/it]

training loss: 2.8348445892333984
training loss: 2.884765386581421
training loss: 2.845005750656128
training loss: 2.922264814376831


training:  72%|██████████████████████████████████████████████████████████████████████████████████████▍                                 | 653/907 [43:00<11:07,  2.63s/it]

training loss: 2.9026646614074707


training:  73%|███████████████████████████████████████████████████████████████████████████████████████                                 | 658/907 [43:03<11:38,  2.80s/it]

training loss: 2.836418867111206
training loss: 2.8655824661254883
training loss: 2.906766891479492
training loss: 2.8710498809814453
validation loss: 2.843404769897461


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▌                                | 662/907 [43:18<12:39,  3.10s/it]

training loss: 2.8710594177246094
training loss: 2.8540265560150146
training loss: 2.873579740524292


training:  73%|███████████████████████████████████████████████████████████████████████████████████████▉                                | 665/907 [43:32<14:19,  3.55s/it]

training loss: 2.8758411407470703
training loss: 2.8809616565704346
training loss: 2.8616130352020264


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▍                               | 668/907 [43:46<15:35,  3.92s/it]

training loss: 2.849479913711548
training loss: 2.8989334106445312
training loss: 2.845618724822998


training:  74%|████████████████████████████████████████████████████████████████████████████████████████▊                               | 671/907 [44:01<16:20,  4.16s/it]

training loss: 2.8976540565490723
training loss: 2.8544468879699707
training loss: 2.823943853378296
validation loss: 2.9080982208251953


training:  74%|█████████████████████████████████████████████████████████████████████████████████████████▏                              | 674/907 [44:12<15:46,  4.06s/it]

training loss: 2.854076862335205
training loss: 2.858041286468506
training loss: 2.86275053024292


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▌                              | 677/907 [44:23<15:00,  3.92s/it]

training loss: 2.9046530723571777
training loss: 2.9011073112487793
training loss: 2.9025890827178955


training:  75%|█████████████████████████████████████████████████████████████████████████████████████████▉                              | 680/907 [44:33<14:15,  3.77s/it]

training loss: 2.8646576404571533
training loss: 2.8596155643463135
training loss: 2.8344640731811523
training loss: 2.8669893741607666


training:  75%|██████████████████████████████████████████████████████████████████████████████████████████▍                             | 684/907 [44:47<13:34,  3.65s/it]

training loss: 2.890030860900879
training loss: 2.8693714141845703
validation loss: 2.8715505599975586
training loss: 2.882962226867676


training:  76%|██████████████████████████████████████████████████████████████████████████████████████████▉                             | 687/907 [44:58<13:32,  3.69s/it]

training loss: 2.899507999420166
training loss: 2.891417980194092
training loss: 2.849104642868042


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▎                            | 690/907 [45:08<13:06,  3.62s/it]

training loss: 2.9372806549072266
training loss: 2.890922784805298
training loss: 2.8581135272979736


training:  76%|███████████████████████████████████████████████████████████████████████████████████████████▋                            | 693/907 [45:20<13:09,  3.69s/it]

training loss: 2.80143666267395
training loss: 2.854996919631958
training loss: 2.932917356491089


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████                            | 696/907 [45:32<13:20,  3.79s/it]

training loss: 2.8597657680511475
training loss: 2.8472280502319336
validation loss: 2.899170398712158
training loss: 2.8059163093566895


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▍                           | 699/907 [45:44<13:22,  3.86s/it]

training loss: 2.8862595558166504
training loss: 2.869570255279541
training loss: 2.8422200679779053


training:  77%|████████████████████████████████████████████████████████████████████████████████████████████▉                           | 702/907 [45:55<13:02,  3.82s/it]

training loss: 2.874800443649292
training loss: 2.866142988204956
training loss: 2.847135066986084


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▎                          | 705/907 [46:07<12:59,  3.86s/it]

training loss: 2.912623405456543
training loss: 2.8997843265533447
training loss: 2.8524093627929688


training:  78%|█████████████████████████████████████████████████████████████████████████████████████████████▋                          | 708/907 [46:19<12:49,  3.87s/it]

training loss: 2.865536689758301
training loss: 2.874006748199463
validation loss: 2.8721375465393066
training loss: 2.870612144470215


training:  78%|██████████████████████████████████████████████████████████████████████████████████████████████                          | 711/907 [46:30<12:38,  3.87s/it]

training loss: 2.898078680038452
training loss: 2.929185390472412
training loss: 2.889901638031006
training loss: 2.8891053199768066
training loss: 2.8228602409362793
training loss: 2.8599441051483154


training:  79%|██████████████████████████████████████████████████████████████████████████████████████████████▊                         | 717/907 [46:42<10:22,  3.28s/it]

training loss: 2.842900037765503
training loss: 2.8940579891204834
training loss: 2.922572374343872
training loss: 2.8756885528564453
training loss: 2.888852834701538
validation loss: 2.8578579425811768
training loss: 2.8839075565338135
training loss: 2.8716611862182617


training:  80%|███████████████████████████████████████████████████████████████████████████████████████████████▊                        | 724/907 [46:52<08:18,  2.72s/it]

training loss: 2.8831324577331543
training loss: 2.8886775970458984
training loss: 2.8789522647857666
training loss: 2.880556344985962
training loss: 2.8908021450042725
training loss: 2.8942997455596924
training loss: 2.899980068206787


training:  81%|████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 731/907 [47:03<07:03,  2.40s/it]

training loss: 2.8580636978149414
training loss: 2.853888988494873
training loss: 2.8793458938598633
validation loss: 2.8746140003204346
training loss: 2.8821985721588135
training loss: 2.89595365524292
training loss: 2.8385300636291504


training:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████▌                      | 737/907 [47:23<07:33,  2.67s/it]

training loss: 2.908374309539795
training loss: 2.845675468444824
training loss: 2.8789825439453125
training loss: 2.867452621459961


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████                      | 741/907 [47:39<08:34,  3.10s/it]

training loss: 2.8183624744415283
training loss: 2.9021973609924316
training loss: 2.865872621536255


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 744/907 [47:53<09:36,  3.54s/it]

training loss: 2.914268970489502
training loss: 2.8461687564849854
validation loss: 2.8543624877929688
training loss: 2.8729045391082764


training:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████▊                     | 747/907 [48:05<09:51,  3.70s/it]

training loss: 2.873202085494995
training loss: 2.832882881164551
training loss: 2.88989520072937


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                    | 750/907 [48:16<09:40,  3.70s/it]

training loss: 2.9083685874938965
training loss: 2.9060821533203125
training loss: 2.864264488220215


training:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 753/907 [48:28<09:29,  3.70s/it]

training loss: 2.914217948913574
training loss: 2.862208843231201
training loss: 2.933953285217285


training:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████                    | 756/907 [48:39<09:18,  3.70s/it]

training loss: 2.8915998935699463
training loss: 2.8732120990753174
validation loss: 2.8723108768463135
training loss: 2.8756046295166016


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 759/907 [48:50<09:11,  3.73s/it]

training loss: 2.935192108154297
training loss: 2.849058151245117
training loss: 2.9281558990478516


training:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 762/907 [49:01<09:00,  3.72s/it]

training loss: 2.8834614753723145
training loss: 2.833667039871216
training loss: 2.9155733585357666


training:  84%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 765/907 [49:12<08:36,  3.64s/it]

training loss: 2.901116371154785
training loss: 2.8910772800445557
training loss: 2.8415935039520264


training:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 768/907 [49:23<08:26,  3.65s/it]

training loss: 2.8934125900268555
training loss: 2.891008138656616
validation loss: 2.895501136779785
training loss: 2.8524880409240723


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████                  | 771/907 [49:34<08:21,  3.69s/it]

training loss: 2.911722421646118
training loss: 2.883280038833618
training loss: 2.8372037410736084


training:  85%|██████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 774/907 [49:45<08:09,  3.68s/it]

training loss: 2.8385403156280518
training loss: 2.8651602268218994
training loss: 2.899003505706787


training:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                 | 777/907 [49:56<07:59,  3.69s/it]

training loss: 2.863534927368164
training loss: 2.8228702545166016
training loss: 2.887927770614624


training:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 780/907 [50:07<07:46,  3.68s/it]

training loss: 2.884708881378174
training loss: 2.871018648147583
validation loss: 2.9025607109069824
training loss: 2.8478028774261475


training:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌                | 783/907 [50:18<07:38,  3.70s/it]

training loss: 2.927544355392456
training loss: 2.87145733833313
training loss: 2.9666407108306885


training:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████▉                | 786/907 [50:29<07:27,  3.70s/it]

training loss: 2.883655548095703
training loss: 2.872763156890869
training loss: 2.907305955886841


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 789/907 [50:42<07:31,  3.82s/it]

training loss: 2.9188754558563232
training loss: 2.8958017826080322
training loss: 2.8949697017669678


training:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████▊               | 792/907 [50:54<07:32,  3.93s/it]

training loss: 2.8804097175598145
training loss: 2.962545871734619
validation loss: 2.8868556022644043
training loss: 2.8635921478271484


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▏              | 795/907 [51:07<07:33,  4.05s/it]

training loss: 2.8774216175079346
training loss: 2.881699323654175
training loss: 2.87518310546875


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 798/907 [51:20<07:30,  4.14s/it]

training loss: 2.852290630340576
training loss: 2.9111087322235107
training loss: 2.8719027042388916


training:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 801/907 [51:33<07:26,  4.21s/it]

training loss: 2.9334890842437744
training loss: 2.903684377670288
training loss: 2.885504961013794


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 804/907 [51:47<07:22,  4.30s/it]

training loss: 2.873506784439087
training loss: 2.867487907409668
validation loss: 2.9073588848114014
training loss: 2.894718885421753


training:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 807/907 [52:01<07:24,  4.44s/it]

training loss: 2.858785629272461
training loss: 2.887981414794922
training loss: 2.865452289581299


training:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 810/907 [52:15<07:17,  4.51s/it]

training loss: 2.895205020904541
training loss: 2.8762011528015137
training loss: 2.8798277378082275


training:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 813/907 [52:28<07:02,  4.49s/it]

training loss: 2.882986068725586
training loss: 2.8951945304870605
training loss: 2.8523752689361572


training:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▉            | 816/907 [52:41<06:44,  4.44s/it]

training loss: 2.8718764781951904
training loss: 2.9116480350494385
validation loss: 2.8819079399108887
training loss: 2.8454980850219727


training:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▎           | 819/907 [52:55<06:36,  4.50s/it]

training loss: 2.8758761882781982
training loss: 2.883949041366577
training loss: 2.8286237716674805


training:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▊           | 822/907 [53:08<06:15,  4.42s/it]

training loss: 2.8707330226898193
training loss: 2.9066720008850098
training loss: 2.884587526321411


training:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▏          | 825/907 [53:21<05:58,  4.37s/it]

training loss: 2.905449867248535
training loss: 2.8960654735565186
training loss: 2.8988683223724365
training loss: 2.839106321334839
training loss: 2.8842060565948486
validation loss: 2.9051575660705566


training:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 830/907 [53:34<04:58,  3.87s/it]

training loss: 2.892972946166992
training loss: 2.8395795822143555
training loss: 2.845407009124756
training loss: 2.891798496246338
training loss: 2.8481273651123047


training:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 835/907 [53:44<03:58,  3.31s/it]

training loss: 2.8748414516448975
training loss: 2.8451688289642334
training loss: 2.9599223136901855
training loss: 2.8667490482330322
training loss: 2.9109201431274414
training loss: 2.8634586334228516
training loss: 2.8980367183685303


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 841/907 [53:56<03:11,  2.90s/it]

validation loss: 2.8843278884887695
training loss: 2.8874335289001465
training loss: 2.895174026489258
training loss: 2.8598642349243164
training loss: 2.8922781944274902


training:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 841/907 [54:10<03:11,  2.90s/it]

training loss: 2.8836212158203125


training:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 847/907 [54:12<02:50,  2.84s/it]

training loss: 2.9040257930755615
training loss: 2.8892109394073486
training loss: 2.882087469100952
training loss: 2.9090328216552734


training:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌       | 851/907 [54:23<02:37,  2.80s/it]

training loss: 2.8712549209594727
training loss: 2.8735268115997314
training loss: 2.8923070430755615
validation loss: 2.895904302597046
training loss: 2.8476788997650146


training:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 855/907 [54:34<02:25,  2.79s/it]

training loss: 2.881732702255249
training loss: 2.8158681392669678
training loss: 2.866075038909912
training loss: 2.9168801307678223


training:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 859/907 [54:45<02:13,  2.77s/it]

training loss: 2.8711652755737305
training loss: 2.920957326889038
training loss: 2.871673583984375
training loss: 2.9500515460968018


training:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 863/907 [54:56<02:01,  2.76s/it]

training loss: 2.8421175479888916
training loss: 2.913248062133789
training loss: 2.838754415512085
validation loss: 2.8592581748962402
training loss: 2.940955638885498


training:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 867/907 [55:07<01:50,  2.77s/it]

training loss: 2.9256515502929688
training loss: 2.910043954849243
training loss: 2.871954917907715
training loss: 2.8496203422546387


training:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 871/907 [55:18<01:39,  2.76s/it]

training loss: 2.8850932121276855
training loss: 2.8561689853668213
training loss: 2.9071052074432373
training loss: 2.9066085815429688


training:  96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 875/907 [55:29<01:27,  2.75s/it]

training loss: 2.9016776084899902
training loss: 2.862398386001587
training loss: 2.8982152938842773
validation loss: 2.8797476291656494
training loss: 2.85992169380188


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 879/907 [55:40<01:17,  2.76s/it]

training loss: 2.8784947395324707
training loss: 2.8828890323638916
training loss: 2.840324640274048
training loss: 2.882000684738159


training:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 883/907 [55:51<01:05,  2.74s/it]

training loss: 2.895172595977783
training loss: 2.875412940979004
training loss: 2.9207966327667236
training loss: 2.876352310180664


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 887/907 [56:02<00:54,  2.74s/it]

training loss: 2.888676404953003
training loss: 2.884965419769287
training loss: 2.825268507003784
validation loss: 2.863696813583374
training loss: 2.8499011993408203


training:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 891/907 [56:13<00:44,  2.75s/it]

training loss: 2.8308217525482178
training loss: 2.8745803833007812
training loss: 2.883356809616089
training loss: 2.8965582847595215


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 895/907 [56:27<00:35,  2.98s/it]

training loss: 2.8056936264038086
training loss: 2.8738515377044678
training loss: 2.862097978591919


training:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 898/907 [56:42<00:32,  3.60s/it]

training loss: 2.8670225143432617
training loss: 2.9096808433532715
training loss: 2.8644065856933594
training loss: 2.8511962890625


training:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 901/907 [56:53<00:21,  3.56s/it]

validation loss: 2.8553528785705566
training loss: 2.965726852416992
training loss: 2.9269609451293945
training loss: 2.8579893112182617


training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 905/907 [57:04<00:06,  3.31s/it]

training loss: 2.909682273864746
training loss: 2.860117197036743


training: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 907/907 [57:09<00:00,  3.78s/it]

training loss: 2.9044148921966553





In [12]:
import torch
x=torch.rand((3,2,5))
x

tensor([[[0.4644, 0.6206, 0.9698, 0.7029, 0.1203],
         [0.3409, 0.8936, 0.4948, 0.7733, 0.4185]],

        [[0.0241, 0.5471, 0.6334, 0.2244, 0.3367],
         [0.4312, 0.7499, 0.9411, 0.6488, 0.4058]],

        [[0.3744, 0.3766, 0.0417, 0.9009, 0.0848],
         [0.0123, 0.6774, 0.8277, 0.1902, 0.2619]]])

In [13]:
x[..., :-1, :]

tensor([[[0.4644, 0.6206, 0.9698, 0.7029, 0.1203]],

        [[0.0241, 0.5471, 0.6334, 0.2244, 0.3367]],

        [[0.3744, 0.3766, 0.0417, 0.9009, 0.0848]]])

In [14]:
x[...,1:]#.reshape(-1)

tensor([[[0.6206, 0.9698, 0.7029, 0.1203],
         [0.8936, 0.4948, 0.7733, 0.4185]],

        [[0.5471, 0.6334, 0.2244, 0.3367],
         [0.7499, 0.9411, 0.6488, 0.4058]],

        [[0.3766, 0.0417, 0.9009, 0.0848],
         [0.6774, 0.8277, 0.1902, 0.2619]]])