In [111]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import math
import copy

In [112]:
class MultiHeadAttention(nn.Module):
  def __init__(self,d_model,num_heads) -> None:
    super(MultiHeadAttention,self).__init__()
    self.d_model=d_model
    self.num_heads=num_heads
    self.d_k=self.d_model//self.num_heads

    self.W_q=nn.Linear(d_model,d_model)
    self.W_k=nn.Linear(d_model,d_model)
    self.W_v=nn.Linear(d_model,d_model)
    self.W_o=nn.Linear(d_model,d_model) #output
  def scaled_dot_product_attention(self,Q,K,V, mask=None):
    attn_scores=torch.matmul(Q,K.transpose(-2,-1))/math.sqrt(self.d_k)
    if mask is not None:
      attn_sores=attn_scores.masked_fill(mask==0,-1e9)
    attn_probs=torch.softmax(attn_scores, dim=-1)
    output=attn_probs@V
    return output
  def split_heads(self,x):
    batch_size,seq_len,d_model=x.size()
    return x.view(batch_size,seq_len,self.num_heads,self.d_k).transpose(1,2)
  def combine_heads(self,x):
    batch_size,_,seq_len,d_k=x.size()
    return x.transpose(1,2).contiguous().view(batch_size,seq_len,self.d_model)
  def forward(self,Q,K,V,mask=None):
    Q=self.split_heads(self.W_q(Q))
    K=self.split_heads(self.W_k(K))
    V=self.split_heads(self.W_v(V))
    attn_output = self.scaled_dot_product_attention(Q,K,V,mask=None)

    output=self.W_o(self.combine_heads(attn_output))
    return output



In [113]:
class PositionWiseFeedForward(nn.Module):
  def __init__(self,d_model,d_ff):
    super(PositionWiseFeedForward, self).__init__()
    self.fc_1=nn.Linear(d_model,d_ff)
    self.fc_2=nn.Linear(d_ff,d_model)
    self.relu=nn.ReLU().to(device)
  def forward(self,x):
    return self.fc_2(self.relu(self.fc_1(x)))




In [114]:
class PositionalEncoding(nn.Module):
  def __init__(self,d_model,max_seq_len) -> None:
    super(PositionalEncoding,self).__init__()
    pe=torch.zeros(max_seq_len,d_model).to(device)
    position=torch.arange(0,max_seq_len,dtype=torch.float).unsqueeze(1).to(device)
    # div_term = torch.exp(torch.arange(0,d_model,2)).float() *(-math.log(10000.0/d_model))
    div_term=torch.exp(torch.arange(0,d_model,2).float()*(-math.log(10000.0)/d_model)).to(device)
    pe[:,0::2]=torch.sin(position*div_term)
    pe[:,1::2]=torch.cos(position*div_term)
    self.register_buffer("pe",pe.unsqueeze(0))
  def forward(self,x):
    return x + self.pe[:,:x.size(1)]


In [115]:
class LayerNorma(nn.Module):

  def __init__(self,epsilon=0.1):
        super().__init__()
        self.eps=epsilon
        self.alpha=nn.Parameter(torch.ones(1)).to(device)
        self.bias=nn.Parameter(torch.zeros(1)).to(device)
  def forward(self,x):
        mean=x.mean(dim=-1,keepdim=True)
        std=x.std(dim=-1,keepdim=True)
        return self.alpha * (x-mean)/(std+self.eps)+self.bias

In [116]:

class EncoderLayer(nn.Module):
  def __init__(self,d_model,num_heads,d_ff,dropout):
    super(EncoderLayer,self).__init__()
    self.multihead=MultiHeadAttention(d_model,num_heads)
    self.feedforward=PositionWiseFeedForward(d_model,d_ff)
    self.norm1=nn.LayerNorm(d_model)
    self.norm2=nn.LayerNorm(d_model)
    # self.norm1=LayerNorma()
    # self.norm2=LayerNorma()
    self.dropout=nn.Dropout(dropout)

  def forward(self,x,mask):
    attn_output=self.multihead(x,x,x,mask)
    x=self.norm1(x+self.dropout(attn_output))
    ff_output=self.feedforward(x)
    x=self.norm2(x+self.dropout(ff_output))
    return x


In [117]:
class DecoderLayer(nn.Module):
  def __init__(self,d_model,num_heads,d_ff,dropout):
    super(DecoderLayer,self).__init__()
    self.selfattn=MultiHeadAttention(d_model,num_heads)
    self.crossattn=MultiHeadAttention(d_model,num_heads)
    self.feedforward=PositionWiseFeedForward(d_model,d_ff)
    self.norm1=nn.LayerNorm(d_model)
    self.norm2=nn.LayerNorm(d_model)
    self.norm3=nn.LayerNorm(d_model)
    # self.norm1=LayerNorma()
    # self.norm2=LayerNorma()
    # self.norm3=LayerNorma()
    self.dropout=nn.Dropout(dropout)
  def forward(self,x,enc_out,src_mask,tgt_mask):
    attn_output=self.selfattn(x,x,x,tgt_mask)
    x=self.norm1(x+self.dropout(attn_output))
    c_attn_out=self.crossattn(enc_out,enc_out,x,src_mask)
    x=self.norm2(x+self.dropout(c_attn_out))
    ff_out=self.feedforward(x)
    x=self.norm3(x+self.dropout(ff_out))
    return x

In [118]:

def causal_mask(size):
    mask=torch.triu(torch.ones(1,size,size),diagonal=1).type(torch.int)
    return mask==0

In [119]:
class Transformer(nn.Module):
  def __init__(self,src_vocab_size,tgt_vocab_size,d_model,num_heads,num_layers,d_ff,max_seq_len,dropout):
    super(Transformer,self).__init__()
    self.encoder_embedding=nn.Embedding(src_vocab_size,d_model)
    self.decoder_embedding=nn.Embedding(tgt_vocab_size,d_model)

    self.positional_encoding=PositionalEncoding(d_model,max_seq_len).to(device)
    self.encoder_layers=nn.ModuleList([EncoderLayer(d_model,num_heads,d_ff,dropout) for _ in  range(num_layers)])
    self.decoder_layers=nn.ModuleList([DecoderLayer(d_model,num_heads,d_ff,dropout) for _ in range(num_layers)])

    self.fc_layer=nn.Linear(d_model,tgt_vocab_size)
    self.dropout=nn.Dropout(dropout)

  def generate_mask(self,src,tgt):
    src_mask=(src!=1).unsqueeze(1).unsqueeze(2).to(device) # include padding seq are not involved
    tgt_mask=(tgt!=1).unsqueeze(1).unsqueeze(3).to(device) # include padding seq are not involved
    seq_length=tgt.size(1)
    nopeak_mask=(1- torch.triu(torch.ones(1, seq_length, seq_length), diagonal=1)).bool()
    tgt_mask=tgt_mask & nopeak_mask.to(device) # future tokens are not visible
    return src_mask.to(device),tgt_mask.to(device)
  def forward(self,src,tgt):
    src_mask,tgt_mask=self.generate_mask(src,tgt)
    # print("src",src_mask, "tgt",tgt_mask)
    src_embedded=self.dropout(self.positional_encoding(self.encoder_embedding(src))).to(device)
    # print(src_embedded)
    tgt_embedded=self.dropout(self.positional_encoding(self.decoder_embedding(tgt))).to(device)
    # print(tgt_embedded)
    enc_output=src_embedded
    for encoder_layer in self.encoder_layers:
      enc_output=encoder_layer(enc_output,src_mask).to(device)

    for decoder_layer in self.decoder_layers:
      dec_output=decoder_layer(tgt_embedded,enc_output,src_mask,tgt_mask).to(device)

    output=torch.log_softmax(self.fc_layer(dec_output),dim=-1).to(device)
    # print(output)

    return output


In [120]:
from datasets import load_dataset
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.trainers import WordLevelTrainer
from tokenizers.pre_tokenizers import Whitespace

# Pathlib?
from pathlib import Path

# typing
from typing import Any

# Library for progress bars in loops
from tqdm import tqdm

# Importing library of warnings
import warnings

In [121]:
import torch
import torch.nn as nn
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader, random_split
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.trainers import WordLevelTrainer
from tokenizers.pre_tokenizers import Whitespace

from pathlib import Path
import numpy as np

In [134]:
def get_all_sentences(ds,lang):
  for item in ds:
    yield item["translation"][lang]
def get_or_build_tokenizer(config,ds,lang):
  tokenizer_path=Path(config["tokenizer_file"].format(lang))
  if not Path.exists(tokenizer_path):
    tokenizer=Tokenizer(WordLevel(unk_token="[UNK]"))
    tokenizer.pre_tokenizer=Whitespace()
    trainer=WordLevelTrainer(special_tokens=["[UNK]","[PAD]","[SOS]","[EOS]"],min_frequency=2)
    tokenizer.train_from_iterator(get_all_sentences(ds,lang),trainer=trainer)
    tokenizer.save(str(tokenizer_path))
  else:
    tokenizer=Tokenizer.from_file(str(tokenizer_path))
  return tokenizer
def get_ds(config):
  ds_raw=load_dataset("opus_books", f'{config["lang_src"]}-{config["lang_tgt"]}', split="train")
  #build tokenizers
  def create_subset(dataset, sample_size):
    indices = np.random.choice(len(dataset), sample_size, replace=False)
    subset = dataset.select(indices)
    return subset
  # ds_raw=create_subset(ds_raw,10)

  tokenizer_src=get_or_build_tokenizer(config,ds_raw,config["lang_src"])
  tokenizer_tgt=get_or_build_tokenizer(config,ds_raw,config["lang_tgt"])

  train_ds_size = int(0.9*len(ds_raw))
  val_ds_size=len(ds_raw)-train_ds_size
  train_ds_raw,val_ds_raw=random_split(ds_raw,[train_ds_size,val_ds_size])

  train_ds= BilingualDataset(train_ds_raw, tokenizer_src,tokenizer_tgt,config["lang_src"],config["lang_tgt"],config["seq_len"])
  val_ds= BilingualDataset(train_ds_raw, tokenizer_src,tokenizer_tgt,config["lang_src"],config["lang_tgt"],config["seq_len"])

  max_len_src=0
  max_len_tgt=0

  for item in ds_raw:
    src_ids=tokenizer_src.encode(item["translation"][config["lang_src"]]).ids
    tgt_ids=tokenizer_tgt.encode(item["translation"][config["lang_src"]]).ids
    max_len_src=max(max_len_src,len(src_ids))
    max_len_tgt=max(max_len_src,len(src_ids))
  # print("max",max_len_src)
  # print("max",max_len_tgt)
  train_dataloader=DataLoader(train_ds,batch_size=config["batch_size"],shuffle=True)
  val_dataloader=DataLoader(val_ds,batch_size=1,shuffle=True)

  return train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt





In [135]:
class BilingualDataset(Dataset):
  def __init__(self,ds,tokenizer_src,tokenizer_tgt,src_lang,tgt_lang,seq_len):
    self.ds=ds
    self.tokenizer_src=tokenizer_src
    self.tokenizer_tgt=tokenizer_tgt
    self.src_lang=src_lang
    self.tgt_lang=tgt_lang
    self.seq_len=seq_len

    # self.sos_token=torch.tensor([tokenizer_src.token_to_id(["[SOS]"])],dtype=torch.int64)
    # self.eos_token=torch.tensor([tokenizer_src.token_to_id(["[EOS]"])],dtype=torch.int64)
    # self.pad_token=torch.tensor([tokenizer_src.token_to_id(["[PAD]"])],dtype=torch.int64)
    self.sos_token = torch.tensor([tokenizer_tgt.token_to_id("[SOS]")], dtype=torch.int64).to(device)
    self.eos_token = torch.tensor([tokenizer_tgt.token_to_id("[EOS]")], dtype=torch.int64).to(device)
    self.pad_token = torch.tensor([tokenizer_tgt.token_to_id("[PAD]")], dtype=torch.int64).to(device)

  def __len__(self):
    return len(self.ds)
  def __getitem__(self,index):
    src_target_pair=self.ds[index]
    src_text=src_target_pair["translation"][self.src_lang]
    tgt_text=src_target_pair["translation"][self.tgt_lang]

    enc_input_tokens=self.tokenizer_src.encode(src_text).ids
    dec_input_tokens=self.tokenizer_tgt.encode(tgt_text).ids

    enc_num_padding_tokens=self.seq_len-len(enc_input_tokens)-2
    dec_num_padding_tokens=self.seq_len-len(dec_input_tokens)-1

    if enc_num_padding_tokens<0 or dec_num_padding_tokens<0:
      raise ValueError("Sentence too long")


    # encoder_input=torch.cat([
    #     self.sos_token,
    #     torch.Tensor(enc_input_tokens,dtype=torch.int64),
    #     self.eos_token,
    #     torch.Tensor([self.pad_token]*enc_num_padding_tokens,dtype=torch.int64)
    # ])
    # decoder_input= torch.cat([
    #     self.sos_token,
    #     torch.Tensor(enc_input_tokens,dtype=torch.int64),
    #     torch.Tensor([self.pad_token]*dec_num_padding_tokens,dtype=torch.int64)
    # ])

    # label= torch.cat([
    #     torch.Tensor(dec_input_tokens,dtype=torch.int64),
    #     self.eos_token,
    #     torch.Tensor([self.pad_token]*dec_num_padding_tokens,dtype=torch.int64)
    # ])
    encoder_input =torch.cat(
            [
                self.sos_token,
                torch.tensor(enc_input_tokens,dtype=torch.int64).to(device),
                self.eos_token,
                torch.tensor([self.pad_token]*enc_num_padding_tokens,dtype=torch.int64).to(device)

            ]
        )

    decoder_input=torch.cat(
            [
                self.sos_token,
                torch.tensor(dec_input_tokens,dtype=torch.int64).to(device),
                torch.tensor([self.pad_token]*dec_num_padding_tokens,dtype=torch.int64).to(device)
            ]


        )

        # eos to label (what we expect output frm decoddeer)
    label=torch.cat(
            [   torch.tensor(dec_input_tokens,dtype=torch.int64).to(device),
                self.eos_token,
                torch.tensor([self.pad_token]*dec_num_padding_tokens,dtype=torch.int64).to(device)

            ]
        )
    assert encoder_input.size(0)== self.seq_len
    assert decoder_input.size(0)== self.seq_len
    assert label.size(0)==self.seq_len

    return{
        "encoder_input":encoder_input,
        "decoder_input":decoder_input,
        'label':label,
         "src_text":src_text,
        "tgt_text":tgt_text
    }




In [136]:
from pathlib import Path
def get_config():
    return{
        "batch_size": 6,
        "num_epochs": 20,
        "lr": 10**-4,
        "seq_len": 350,
        "d_model": 512,
        "lang_src": "en",
        "lang_tgt":"it",
        "model_folder": "weights",
        "model_filename": "tmodel_",
        "preload": None,
        "tokenizer_file":"tokenizer_{0}.json",
        "experiment_name":"runs/tmodel"


    }

def get_weights_file_path(config,epoch):
    model_folder= config["model_folder"]
    model_basename= config["model_base"]
    model_filename= f"{model_basename}{epoch}.pt"
    return str (Path('.') / model_folder / model_filename)




In [137]:
device =torch.device("cuda")
train_dataloader,val_dataloader,tokenizer_src,tokenizer_tgt = get_ds(get_config())
src_vocab_size = tokenizer_src.get_vocab_size()
tgt_vocab_size = tokenizer_tgt.get_vocab_size()
print(tgt_vocab_size)
d_model = 512
num_heads = 8
num_layers = 6
d_ff = 2048
max_seq_length = 350
dropout = 0.1


transformer = Transformer(src_vocab_size, tgt_vocab_size, d_model, num_heads, num_layers, d_ff, max_seq_length, dropout).to(device)

# for p in transformer.parameters():
#   if p.dim()>1:
#     nn.init.xavier_uniform_(p)

# for batch in batch_iterator:
#    enc_input= batch["encoder_input"].to(device) #(B,s)
#    dec_input= batch["decoder_input"].to(device)
#   #  enc_mask=batch["encoder_mask"].to(device) #(B,1,1,s)
#   #  dec_mask= batch["decoder_mask"].to(device) #(B,1,s,s)

#   #  enc_output=model.encode(enc_input,enc_mask)
#   #  dec_output=model.decode(enc_output,enc_mask,dec_input,dec_mask)
#   #  proj_output=model.project(dec_output) #(B,S,TGT_VOCAB_SIZE)

#             label=batch["label"].to(device) #(B,s)
#             loss=loss_fn(proj_output.view(-1,tokenizer_tgt.get_vocab_size()),label.view(-1))
#             batch_iterator.set_postfix({f"loss": f"{loss.item():6.3f}"})

#             loss.backward()
#             optimizer.step()
#             optimizer.zero_grad()
#             global_step+=1
#         model_filename=get_weights_file_path(config, f"{e:02d}")
#         torch.save({
#             "epoch":e,
#             "model_state_dict":model.state_dict(),
#             "optimizer_state_dict" : optimizer.state_dict(),
#             "global_step":global_step
#         }, model_filename)

# Generate random sample data
# src_data = torch.randint(1, src_vocab_size, (6, max_seq_length))  # (batch_size, seq_length)
# tgt_data = torch.randint(1, tgt_vocab_size, (6, max_seq_length))  # (batch_size, seq_length)

22463


In [138]:


criterion = nn.CrossEntropyLoss(ignore_index=tokenizer_src.token_to_id('[PAD]'),label_smoothing=0.1).to(device)
optimizer = optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

transformer.train()

for epoch in range(50):
    batch_iterator=tqdm(train_dataloader, desc=f"Processing epoch {epoch:02d}")
    for batch in batch_iterator:
      optimizer.zero_grad()

      output = transformer(batch["encoder_input"].to(device), batch["decoder_input"].to(device))

      loss = criterion(output.view(-1, tgt_vocab_size), batch["label"].view(-1).to(device))
      loss.backward()
      optimizer.step()
      print(f"Epoch: {epoch+1}, Loss: {loss.item()}")
    torch.save(transformer.state_dict(),"model.pth")

Processing epoch 00:   0%|          | 1/4850 [00:00<27:17,  2.96it/s]

Epoch: 1, Loss: 10.192648887634277


Processing epoch 00:   0%|          | 2/4850 [00:00<23:39,  3.42it/s]

Epoch: 1, Loss: 10.057942390441895


Processing epoch 00:   0%|          | 3/4850 [00:00<23:07,  3.49it/s]

Epoch: 1, Loss: 10.045004844665527


Processing epoch 00:   0%|          | 4/4850 [00:01<22:59,  3.51it/s]

Epoch: 1, Loss: 9.88565731048584


Processing epoch 00:   0%|          | 5/4850 [00:01<22:49,  3.54it/s]

Epoch: 1, Loss: 9.658912658691406


Processing epoch 00:   0%|          | 6/4850 [00:01<22:38,  3.57it/s]

Epoch: 1, Loss: 9.647951126098633


Processing epoch 00:   0%|          | 7/4850 [00:02<22:49,  3.54it/s]

Epoch: 1, Loss: 9.559700012207031


Processing epoch 00:   0%|          | 8/4850 [00:02<22:48,  3.54it/s]

Epoch: 1, Loss: 9.419933319091797


Processing epoch 00:   0%|          | 9/4850 [00:02<22:50,  3.53it/s]

Epoch: 1, Loss: 9.345985412597656


Processing epoch 00:   0%|          | 10/4850 [00:02<22:55,  3.52it/s]

Epoch: 1, Loss: 9.331493377685547


Processing epoch 00:   0%|          | 11/4850 [00:03<22:48,  3.54it/s]

Epoch: 1, Loss: 9.072494506835938


Processing epoch 00:   0%|          | 12/4850 [00:03<22:21,  3.61it/s]

Epoch: 1, Loss: 9.397090911865234


Processing epoch 00:   0%|          | 13/4850 [00:03<21:58,  3.67it/s]

Epoch: 1, Loss: 9.296061515808105


Processing epoch 00:   0%|          | 14/4850 [00:03<22:00,  3.66it/s]

Epoch: 1, Loss: 8.851800918579102


Processing epoch 00:   0%|          | 15/4850 [00:04<21:49,  3.69it/s]

Epoch: 1, Loss: 8.939373970031738


Processing epoch 00:   0%|          | 16/4850 [00:04<21:35,  3.73it/s]

Epoch: 1, Loss: 8.675220489501953


Processing epoch 00:   0%|          | 17/4850 [00:04<21:29,  3.75it/s]

Epoch: 1, Loss: 8.841644287109375


Processing epoch 00:   0%|          | 18/4850 [00:04<21:26,  3.76it/s]

Epoch: 1, Loss: 8.758639335632324


Processing epoch 00:   0%|          | 19/4850 [00:05<21:18,  3.78it/s]

Epoch: 1, Loss: 8.563658714294434


Processing epoch 00:   0%|          | 20/4850 [00:05<21:14,  3.79it/s]

Epoch: 1, Loss: 8.833366394042969


Processing epoch 00:   0%|          | 21/4850 [00:05<21:13,  3.79it/s]

Epoch: 1, Loss: 8.721978187561035


Processing epoch 00:   0%|          | 22/4850 [00:06<21:17,  3.78it/s]

Epoch: 1, Loss: 8.622102737426758


Processing epoch 00:   0%|          | 23/4850 [00:06<21:16,  3.78it/s]

Epoch: 1, Loss: 8.588335990905762


Processing epoch 00:   0%|          | 24/4850 [00:06<21:22,  3.76it/s]

Epoch: 1, Loss: 8.50351619720459


Processing epoch 00:   1%|          | 25/4850 [00:06<21:22,  3.76it/s]

Epoch: 1, Loss: 8.527480125427246


Processing epoch 00:   1%|          | 26/4850 [00:07<21:21,  3.76it/s]

Epoch: 1, Loss: 8.43051528930664


Processing epoch 00:   1%|          | 27/4850 [00:07<21:17,  3.78it/s]

Epoch: 1, Loss: 8.143059730529785


Processing epoch 00:   1%|          | 28/4850 [00:07<21:15,  3.78it/s]

Epoch: 1, Loss: 8.528611183166504


Processing epoch 00:   1%|          | 29/4850 [00:07<21:11,  3.79it/s]

Epoch: 1, Loss: 8.478793144226074


Processing epoch 00:   1%|          | 30/4850 [00:08<21:12,  3.79it/s]

Epoch: 1, Loss: 8.286715507507324


Processing epoch 00:   1%|          | 31/4850 [00:08<21:15,  3.78it/s]

Epoch: 1, Loss: 8.240273475646973


Processing epoch 00:   1%|          | 32/4850 [00:08<21:12,  3.79it/s]

Epoch: 1, Loss: 8.441574096679688


Processing epoch 00:   1%|          | 33/4850 [00:08<21:13,  3.78it/s]

Epoch: 1, Loss: 8.195011138916016


Processing epoch 00:   1%|          | 34/4850 [00:09<21:12,  3.79it/s]

Epoch: 1, Loss: 8.221668243408203


Processing epoch 00:   1%|          | 35/4850 [00:09<21:10,  3.79it/s]

Epoch: 1, Loss: 7.939814567565918


Processing epoch 00:   1%|          | 36/4850 [00:09<21:11,  3.79it/s]

Epoch: 1, Loss: 8.16071891784668


Processing epoch 00:   1%|          | 37/4850 [00:10<21:18,  3.77it/s]

Epoch: 1, Loss: 7.813487529754639


Processing epoch 00:   1%|          | 38/4850 [00:10<21:18,  3.76it/s]

Epoch: 1, Loss: 7.810903549194336


Processing epoch 00:   1%|          | 39/4850 [00:10<21:21,  3.76it/s]

Epoch: 1, Loss: 7.666926383972168


Processing epoch 00:   1%|          | 40/4850 [00:10<21:18,  3.76it/s]

Epoch: 1, Loss: 7.8192830085754395


Processing epoch 00:   1%|          | 41/4850 [00:11<21:30,  3.73it/s]

Epoch: 1, Loss: 7.753388404846191


Processing epoch 00:   1%|          | 42/4850 [00:11<21:23,  3.75it/s]

Epoch: 1, Loss: 7.691771984100342


Processing epoch 00:   1%|          | 43/4850 [00:11<21:16,  3.77it/s]

Epoch: 1, Loss: 7.840769290924072


Processing epoch 00:   1%|          | 44/4850 [00:11<21:17,  3.76it/s]

Epoch: 1, Loss: 7.777296543121338


Processing epoch 00:   1%|          | 45/4850 [00:12<21:17,  3.76it/s]

Epoch: 1, Loss: 8.031612396240234


Processing epoch 00:   1%|          | 46/4850 [00:12<21:18,  3.76it/s]

Epoch: 1, Loss: 7.894691467285156


Processing epoch 00:   1%|          | 47/4850 [00:12<21:20,  3.75it/s]

Epoch: 1, Loss: 7.879481315612793


Processing epoch 00:   1%|          | 48/4850 [00:12<21:15,  3.76it/s]

Epoch: 1, Loss: 7.612637519836426


Processing epoch 00:   1%|          | 49/4850 [00:13<21:27,  3.73it/s]

Epoch: 1, Loss: 7.484038829803467


Processing epoch 00:   1%|          | 50/4850 [00:13<21:52,  3.66it/s]

Epoch: 1, Loss: 7.562959671020508


Processing epoch 00:   1%|          | 51/4850 [00:13<22:01,  3.63it/s]

Epoch: 1, Loss: 7.660431385040283


Processing epoch 00:   1%|          | 52/4850 [00:14<22:09,  3.61it/s]

Epoch: 1, Loss: 7.739768981933594


Processing epoch 00:   1%|          | 53/4850 [00:14<22:15,  3.59it/s]

Epoch: 1, Loss: 7.5685834884643555


Processing epoch 00:   1%|          | 54/4850 [00:14<22:21,  3.58it/s]

Epoch: 1, Loss: 7.394236087799072


Processing epoch 00:   1%|          | 55/4850 [00:14<22:44,  3.51it/s]

Epoch: 1, Loss: 7.6236186027526855


Processing epoch 00:   1%|          | 56/4850 [00:15<22:56,  3.48it/s]

Epoch: 1, Loss: 7.574481010437012


Processing epoch 00:   1%|          | 57/4850 [00:15<22:53,  3.49it/s]

Epoch: 1, Loss: 7.497286796569824


Processing epoch 00:   1%|          | 58/4850 [00:15<22:38,  3.53it/s]

Epoch: 1, Loss: 7.5351433753967285


Processing epoch 00:   1%|          | 59/4850 [00:16<22:21,  3.57it/s]

Epoch: 1, Loss: 7.343967914581299


Processing epoch 00:   1%|          | 60/4850 [00:16<22:05,  3.61it/s]

Epoch: 1, Loss: 7.598684787750244


Processing epoch 00:   1%|▏         | 61/4850 [00:16<21:48,  3.66it/s]

Epoch: 1, Loss: 7.664877891540527


Processing epoch 00:   1%|▏         | 62/4850 [00:16<21:38,  3.69it/s]

Epoch: 1, Loss: 7.649345874786377


Processing epoch 00:   1%|▏         | 63/4850 [00:17<21:30,  3.71it/s]

Epoch: 1, Loss: 7.29348087310791


Processing epoch 00:   1%|▏         | 64/4850 [00:17<21:22,  3.73it/s]

Epoch: 1, Loss: 7.6203203201293945


Processing epoch 00:   1%|▏         | 65/4850 [00:17<21:16,  3.75it/s]

Epoch: 1, Loss: 7.2057204246521


Processing epoch 00:   1%|▏         | 66/4850 [00:17<21:13,  3.76it/s]

Epoch: 1, Loss: 7.183619976043701


Processing epoch 00:   1%|▏         | 67/4850 [00:18<21:13,  3.76it/s]

Epoch: 1, Loss: 7.490080833435059


Processing epoch 00:   1%|▏         | 68/4850 [00:18<21:06,  3.77it/s]

Epoch: 1, Loss: 7.572448253631592


Processing epoch 00:   1%|▏         | 69/4850 [00:18<21:07,  3.77it/s]

Epoch: 1, Loss: 7.386894226074219


Processing epoch 00:   1%|▏         | 70/4850 [00:18<21:02,  3.78it/s]

Epoch: 1, Loss: 7.665482044219971


Processing epoch 00:   1%|▏         | 71/4850 [00:19<21:06,  3.77it/s]

Epoch: 1, Loss: 7.384988784790039


Processing epoch 00:   1%|▏         | 72/4850 [00:19<21:06,  3.77it/s]

Epoch: 1, Loss: 7.038545608520508


Processing epoch 00:   2%|▏         | 73/4850 [00:19<21:09,  3.76it/s]

Epoch: 1, Loss: 7.4392924308776855


Processing epoch 00:   2%|▏         | 74/4850 [00:20<21:09,  3.76it/s]

Epoch: 1, Loss: 7.242259502410889


Processing epoch 00:   2%|▏         | 75/4850 [00:20<21:13,  3.75it/s]

Epoch: 1, Loss: 7.166160583496094


Processing epoch 00:   2%|▏         | 76/4850 [00:20<21:14,  3.74it/s]

Epoch: 1, Loss: 7.805935859680176


Processing epoch 00:   2%|▏         | 77/4850 [00:20<21:18,  3.73it/s]

Epoch: 1, Loss: 7.297975540161133


Processing epoch 00:   2%|▏         | 78/4850 [00:21<21:23,  3.72it/s]

Epoch: 1, Loss: 7.611658096313477


Processing epoch 00:   2%|▏         | 79/4850 [00:21<21:28,  3.70it/s]

Epoch: 1, Loss: 7.239284515380859


Processing epoch 00:   2%|▏         | 80/4850 [00:21<21:26,  3.71it/s]

Epoch: 1, Loss: 7.006853103637695


Processing epoch 00:   2%|▏         | 81/4850 [00:21<21:26,  3.71it/s]

Epoch: 1, Loss: 7.58055305480957


Processing epoch 00:   2%|▏         | 82/4850 [00:22<21:25,  3.71it/s]

Epoch: 1, Loss: 7.150156497955322


Processing epoch 00:   2%|▏         | 83/4850 [00:22<21:21,  3.72it/s]

Epoch: 1, Loss: 7.26239013671875


Processing epoch 00:   2%|▏         | 84/4850 [00:22<21:17,  3.73it/s]

Epoch: 1, Loss: 7.249406337738037


Processing epoch 00:   2%|▏         | 85/4850 [00:22<21:20,  3.72it/s]

Epoch: 1, Loss: 6.942783355712891


Processing epoch 00:   2%|▏         | 86/4850 [00:23<21:36,  3.67it/s]

Epoch: 1, Loss: 7.21127986907959


Processing epoch 00:   2%|▏         | 87/4850 [00:23<21:37,  3.67it/s]

Epoch: 1, Loss: 7.29755973815918


Processing epoch 00:   2%|▏         | 88/4850 [00:23<21:30,  3.69it/s]

Epoch: 1, Loss: 7.5436859130859375


Processing epoch 00:   2%|▏         | 89/4850 [00:24<21:30,  3.69it/s]

Epoch: 1, Loss: 7.204287052154541


Processing epoch 00:   2%|▏         | 90/4850 [00:24<21:33,  3.68it/s]

Epoch: 1, Loss: 6.7169084548950195


Processing epoch 00:   2%|▏         | 91/4850 [00:24<21:32,  3.68it/s]

Epoch: 1, Loss: 7.014886856079102


Processing epoch 00:   2%|▏         | 92/4850 [00:24<21:37,  3.67it/s]

Epoch: 1, Loss: 7.536462783813477


Processing epoch 00:   2%|▏         | 93/4850 [00:25<21:32,  3.68it/s]

Epoch: 1, Loss: 6.9684367179870605


Processing epoch 00:   2%|▏         | 94/4850 [00:25<21:40,  3.66it/s]

Epoch: 1, Loss: 7.04840087890625


Processing epoch 00:   2%|▏         | 95/4850 [00:25<21:35,  3.67it/s]

Epoch: 1, Loss: 7.343673229217529


Processing epoch 00:   2%|▏         | 96/4850 [00:26<21:55,  3.61it/s]

Epoch: 1, Loss: 7.306282043457031


Processing epoch 00:   2%|▏         | 97/4850 [00:26<22:06,  3.58it/s]

Epoch: 1, Loss: 7.232959747314453


Processing epoch 00:   2%|▏         | 98/4850 [00:26<22:22,  3.54it/s]

Epoch: 1, Loss: 7.310522079467773


Processing epoch 00:   2%|▏         | 99/4850 [00:26<22:27,  3.53it/s]

Epoch: 1, Loss: 7.427518367767334


Processing epoch 00:   2%|▏         | 100/4850 [00:27<22:34,  3.51it/s]

Epoch: 1, Loss: 7.426396369934082


Processing epoch 00:   2%|▏         | 101/4850 [00:27<22:40,  3.49it/s]

Epoch: 1, Loss: 7.130452632904053


Processing epoch 00:   2%|▏         | 102/4850 [00:27<22:41,  3.49it/s]

Epoch: 1, Loss: 7.169071197509766


Processing epoch 00:   2%|▏         | 103/4850 [00:28<22:43,  3.48it/s]

Epoch: 1, Loss: 7.469249248504639


Processing epoch 00:   2%|▏         | 104/4850 [00:28<22:47,  3.47it/s]

Epoch: 1, Loss: 7.37531852722168


Processing epoch 00:   2%|▏         | 105/4850 [00:28<22:33,  3.51it/s]

Epoch: 1, Loss: 7.045212268829346


Processing epoch 00:   2%|▏         | 106/4850 [00:28<22:07,  3.57it/s]

Epoch: 1, Loss: 7.388391017913818


Processing epoch 00:   2%|▏         | 107/4850 [00:29<21:59,  3.59it/s]

Epoch: 1, Loss: 7.114217281341553


Processing epoch 00:   2%|▏         | 108/4850 [00:29<21:48,  3.62it/s]

Epoch: 1, Loss: 7.251265525817871


Processing epoch 00:   2%|▏         | 109/4850 [00:29<21:37,  3.65it/s]

Epoch: 1, Loss: 7.298006057739258


Processing epoch 00:   2%|▏         | 110/4850 [00:29<21:30,  3.67it/s]

Epoch: 1, Loss: 7.0457868576049805


Processing epoch 00:   2%|▏         | 111/4850 [00:30<21:29,  3.68it/s]

Epoch: 1, Loss: 7.251752853393555


Processing epoch 00:   2%|▏         | 112/4850 [00:30<21:27,  3.68it/s]

Epoch: 1, Loss: 6.979265213012695


Processing epoch 00:   2%|▏         | 113/4850 [00:30<21:25,  3.68it/s]

Epoch: 1, Loss: 7.278571128845215


Processing epoch 00:   2%|▏         | 114/4850 [00:31<21:27,  3.68it/s]

Epoch: 1, Loss: 7.229833126068115


Processing epoch 00:   2%|▏         | 115/4850 [00:31<21:25,  3.68it/s]

Epoch: 1, Loss: 7.339005947113037


Processing epoch 00:   2%|▏         | 116/4850 [00:31<21:25,  3.68it/s]

Epoch: 1, Loss: 7.40978479385376


Processing epoch 00:   2%|▏         | 117/4850 [00:31<21:25,  3.68it/s]

Epoch: 1, Loss: 7.2329912185668945


Processing epoch 00:   2%|▏         | 118/4850 [00:32<21:32,  3.66it/s]

Epoch: 1, Loss: 6.8640360832214355


Processing epoch 00:   2%|▏         | 119/4850 [00:32<21:29,  3.67it/s]

Epoch: 1, Loss: 7.428400039672852


Processing epoch 00:   2%|▏         | 120/4850 [00:32<21:36,  3.65it/s]

Epoch: 1, Loss: 7.378573894500732


Processing epoch 00:   2%|▏         | 121/4850 [00:32<21:30,  3.66it/s]

Epoch: 1, Loss: 7.48638391494751


Processing epoch 00:   3%|▎         | 122/4850 [00:33<21:25,  3.68it/s]

Epoch: 1, Loss: 6.9596147537231445


Processing epoch 00:   3%|▎         | 123/4850 [00:33<21:21,  3.69it/s]

Epoch: 1, Loss: 7.19236946105957


Processing epoch 00:   3%|▎         | 124/4850 [00:33<21:23,  3.68it/s]

Epoch: 1, Loss: 7.35268497467041


Processing epoch 00:   3%|▎         | 125/4850 [00:34<21:22,  3.69it/s]

Epoch: 1, Loss: 6.969875335693359


Processing epoch 00:   3%|▎         | 126/4850 [00:34<21:18,  3.69it/s]

Epoch: 1, Loss: 7.252747058868408


Processing epoch 00:   3%|▎         | 127/4850 [00:34<21:17,  3.70it/s]

Epoch: 1, Loss: 7.279244899749756


Processing epoch 00:   3%|▎         | 128/4850 [00:34<21:14,  3.70it/s]

Epoch: 1, Loss: 7.051374435424805


Processing epoch 00:   3%|▎         | 129/4850 [00:35<21:19,  3.69it/s]

Epoch: 1, Loss: 7.346218109130859


Processing epoch 00:   3%|▎         | 130/4850 [00:35<21:17,  3.69it/s]

Epoch: 1, Loss: 7.245000839233398


Processing epoch 00:   3%|▎         | 131/4850 [00:35<21:13,  3.70it/s]

Epoch: 1, Loss: 6.882325172424316


Processing epoch 00:   3%|▎         | 132/4850 [00:35<21:10,  3.71it/s]

Epoch: 1, Loss: 7.493710517883301


Processing epoch 00:   3%|▎         | 133/4850 [00:36<21:09,  3.72it/s]

Epoch: 1, Loss: 6.873601913452148


Processing epoch 00:   3%|▎         | 134/4850 [00:36<21:06,  3.72it/s]

Epoch: 1, Loss: 6.811888217926025


Processing epoch 00:   3%|▎         | 135/4850 [00:36<21:10,  3.71it/s]

Epoch: 1, Loss: 7.090500831604004


Processing epoch 00:   3%|▎         | 136/4850 [00:36<21:11,  3.71it/s]

Epoch: 1, Loss: 7.120522975921631


Processing epoch 00:   3%|▎         | 137/4850 [00:37<21:12,  3.70it/s]

Epoch: 1, Loss: 7.122215270996094


Processing epoch 00:   3%|▎         | 138/4850 [00:37<21:15,  3.70it/s]

Epoch: 1, Loss: 7.39932918548584


Processing epoch 00:   3%|▎         | 139/4850 [00:37<21:35,  3.64it/s]

Epoch: 1, Loss: 6.955165386199951


Processing epoch 00:   3%|▎         | 140/4850 [00:38<21:26,  3.66it/s]

Epoch: 1, Loss: 7.0485992431640625


Processing epoch 00:   3%|▎         | 141/4850 [00:38<21:39,  3.62it/s]

Epoch: 1, Loss: 6.66623067855835


Processing epoch 00:   3%|▎         | 142/4850 [00:38<21:53,  3.58it/s]

Epoch: 1, Loss: 6.92849063873291


Processing epoch 00:   3%|▎         | 143/4850 [00:38<22:10,  3.54it/s]

Epoch: 1, Loss: 7.121799945831299


Processing epoch 00:   3%|▎         | 144/4850 [00:39<22:22,  3.51it/s]

Epoch: 1, Loss: 7.0536298751831055


Processing epoch 00:   3%|▎         | 145/4850 [00:39<22:18,  3.52it/s]

Epoch: 1, Loss: 7.1752471923828125


Processing epoch 00:   3%|▎         | 146/4850 [00:39<22:25,  3.50it/s]

Epoch: 1, Loss: 6.861243724822998


Processing epoch 00:   3%|▎         | 147/4850 [00:40<22:36,  3.47it/s]

Epoch: 1, Loss: 6.946854114532471


Processing epoch 00:   3%|▎         | 148/4850 [00:40<22:33,  3.47it/s]

Epoch: 1, Loss: 7.0964484214782715


Processing epoch 00:   3%|▎         | 149/4850 [00:40<22:29,  3.48it/s]

Epoch: 1, Loss: 6.869383335113525


Processing epoch 00:   3%|▎         | 150/4850 [00:40<22:12,  3.53it/s]

Epoch: 1, Loss: 7.168990612030029


Processing epoch 00:   3%|▎         | 151/4850 [00:41<21:52,  3.58it/s]

Epoch: 1, Loss: 7.060680389404297


Processing epoch 00:   3%|▎         | 152/4850 [00:41<21:33,  3.63it/s]

Epoch: 1, Loss: 7.321810245513916


Processing epoch 00:   3%|▎         | 153/4850 [00:41<21:23,  3.66it/s]

Epoch: 1, Loss: 7.362762928009033


Processing epoch 00:   3%|▎         | 154/4850 [00:42<21:17,  3.68it/s]

Epoch: 1, Loss: 7.2234673500061035


Processing epoch 00:   3%|▎         | 155/4850 [00:42<21:11,  3.69it/s]

Epoch: 1, Loss: 7.152405261993408


Processing epoch 00:   3%|▎         | 156/4850 [00:42<21:17,  3.67it/s]

Epoch: 1, Loss: 6.681962013244629


Processing epoch 00:   3%|▎         | 157/4850 [00:42<21:10,  3.69it/s]

Epoch: 1, Loss: 7.037688732147217


Processing epoch 00:   3%|▎         | 158/4850 [00:43<21:06,  3.70it/s]

Epoch: 1, Loss: 6.9838738441467285


Processing epoch 00:   3%|▎         | 159/4850 [00:43<21:04,  3.71it/s]

Epoch: 1, Loss: 6.885560035705566


Processing epoch 00:   3%|▎         | 160/4850 [00:43<21:02,  3.71it/s]

Epoch: 1, Loss: 7.0984930992126465


Processing epoch 00:   3%|▎         | 161/4850 [00:43<20:58,  3.73it/s]

Epoch: 1, Loss: 7.008552074432373


Processing epoch 00:   3%|▎         | 162/4850 [00:44<20:58,  3.73it/s]

Epoch: 1, Loss: 7.040389060974121


Processing epoch 00:   3%|▎         | 163/4850 [00:44<21:09,  3.69it/s]

Epoch: 1, Loss: 7.25909948348999


Processing epoch 00:   3%|▎         | 164/4850 [00:44<21:02,  3.71it/s]

Epoch: 1, Loss: 6.800551414489746


Processing epoch 00:   3%|▎         | 165/4850 [00:44<21:06,  3.70it/s]

Epoch: 1, Loss: 7.505126476287842


Processing epoch 00:   3%|▎         | 166/4850 [00:45<21:08,  3.69it/s]

Epoch: 1, Loss: 7.50282621383667


Processing epoch 00:   3%|▎         | 167/4850 [00:45<21:05,  3.70it/s]

Epoch: 1, Loss: 7.001698017120361


Processing epoch 00:   3%|▎         | 168/4850 [00:45<21:04,  3.70it/s]

Epoch: 1, Loss: 6.781984329223633


Processing epoch 00:   3%|▎         | 169/4850 [00:46<21:07,  3.69it/s]

Epoch: 1, Loss: 7.498358249664307


Processing epoch 00:   4%|▎         | 170/4850 [00:46<21:05,  3.70it/s]

Epoch: 1, Loss: 7.019119739532471


Processing epoch 00:   4%|▎         | 171/4850 [00:46<20:55,  3.73it/s]

Epoch: 1, Loss: 6.857014179229736


Processing epoch 00:   4%|▎         | 172/4850 [00:46<20:47,  3.75it/s]

Epoch: 1, Loss: 6.893717288970947


Processing epoch 00:   4%|▎         | 173/4850 [00:47<20:42,  3.76it/s]

Epoch: 1, Loss: 7.14776086807251


Processing epoch 00:   4%|▎         | 174/4850 [00:47<20:42,  3.76it/s]

Epoch: 1, Loss: 6.908448696136475


Processing epoch 00:   4%|▎         | 175/4850 [00:47<20:40,  3.77it/s]

Epoch: 1, Loss: 7.439417839050293


Processing epoch 00:   4%|▎         | 176/4850 [00:47<20:45,  3.75it/s]

Epoch: 1, Loss: 7.044882774353027


Processing epoch 00:   4%|▎         | 177/4850 [00:48<20:45,  3.75it/s]

Epoch: 1, Loss: 7.002374172210693


Processing epoch 00:   4%|▎         | 178/4850 [00:48<20:46,  3.75it/s]

Epoch: 1, Loss: 6.973525524139404


Processing epoch 00:   4%|▎         | 179/4850 [00:48<20:42,  3.76it/s]

Epoch: 1, Loss: 7.051778316497803


Processing epoch 00:   4%|▎         | 180/4850 [00:48<20:37,  3.77it/s]

Epoch: 1, Loss: 7.0431060791015625


Processing epoch 00:   4%|▎         | 181/4850 [00:49<20:29,  3.80it/s]

Epoch: 1, Loss: 7.096217632293701


Processing epoch 00:   4%|▍         | 182/4850 [00:49<20:29,  3.80it/s]

Epoch: 1, Loss: 7.285880088806152


Processing epoch 00:   4%|▍         | 183/4850 [00:49<20:29,  3.80it/s]

Epoch: 1, Loss: 6.721405982971191


Processing epoch 00:   4%|▍         | 184/4850 [00:50<20:30,  3.79it/s]

Epoch: 1, Loss: 6.999978065490723


Processing epoch 00:   4%|▍         | 185/4850 [00:50<20:28,  3.80it/s]

Epoch: 1, Loss: 7.060935020446777


Processing epoch 00:   4%|▍         | 186/4850 [00:50<20:32,  3.78it/s]

Epoch: 1, Loss: 7.126310348510742


Processing epoch 00:   4%|▍         | 187/4850 [00:50<20:34,  3.78it/s]

Epoch: 1, Loss: 6.32005500793457


Processing epoch 00:   4%|▍         | 188/4850 [00:51<21:12,  3.66it/s]

Epoch: 1, Loss: 7.09503698348999


Processing epoch 00:   4%|▍         | 189/4850 [00:51<21:25,  3.63it/s]

Epoch: 1, Loss: 7.076112747192383


Processing epoch 00:   4%|▍         | 190/4850 [00:51<21:36,  3.59it/s]

Epoch: 1, Loss: 7.336305141448975


Processing epoch 00:   4%|▍         | 191/4850 [00:51<21:46,  3.57it/s]

Epoch: 1, Loss: 7.071622848510742


Processing epoch 00:   4%|▍         | 192/4850 [00:52<21:55,  3.54it/s]

Epoch: 1, Loss: 6.989117622375488


Processing epoch 00:   4%|▍         | 193/4850 [00:52<22:01,  3.52it/s]

Epoch: 1, Loss: 7.237937927246094


Processing epoch 00:   4%|▍         | 194/4850 [00:52<22:04,  3.51it/s]

Epoch: 1, Loss: 6.707308769226074


Processing epoch 00:   4%|▍         | 195/4850 [00:53<21:46,  3.56it/s]

Epoch: 1, Loss: 6.780998706817627


Processing epoch 00:   4%|▍         | 196/4850 [00:53<21:57,  3.53it/s]

Epoch: 1, Loss: 7.46219539642334


Processing epoch 00:   4%|▍         | 197/4850 [00:53<21:33,  3.60it/s]

Epoch: 1, Loss: 7.106417179107666


Processing epoch 00:   4%|▍         | 198/4850 [00:53<21:13,  3.65it/s]

Epoch: 1, Loss: 7.077313423156738


Processing epoch 00:   4%|▍         | 199/4850 [00:54<21:05,  3.67it/s]

Epoch: 1, Loss: 7.0683722496032715


Processing epoch 00:   4%|▍         | 200/4850 [00:54<20:56,  3.70it/s]

Epoch: 1, Loss: 7.114382266998291


Processing epoch 00:   4%|▍         | 201/4850 [00:54<20:50,  3.72it/s]

Epoch: 1, Loss: 7.058751583099365


Processing epoch 00:   4%|▍         | 202/4850 [00:54<20:47,  3.73it/s]

Epoch: 1, Loss: 6.905307292938232


Processing epoch 00:   4%|▍         | 203/4850 [00:55<20:44,  3.73it/s]

Epoch: 1, Loss: 7.001152992248535


Processing epoch 00:   4%|▍         | 204/4850 [00:55<20:38,  3.75it/s]

Epoch: 1, Loss: 6.87741756439209


Processing epoch 00:   4%|▍         | 205/4850 [00:55<20:33,  3.77it/s]

Epoch: 1, Loss: 6.896303176879883


Processing epoch 00:   4%|▍         | 206/4850 [00:56<20:31,  3.77it/s]

Epoch: 1, Loss: 6.908170223236084


Processing epoch 00:   4%|▍         | 207/4850 [00:56<20:38,  3.75it/s]

Epoch: 1, Loss: 7.323903560638428


Processing epoch 00:   4%|▍         | 208/4850 [00:56<20:38,  3.75it/s]

Epoch: 1, Loss: 6.867839336395264


Processing epoch 00:   4%|▍         | 209/4850 [00:56<20:36,  3.75it/s]

Epoch: 1, Loss: 7.050274848937988


Processing epoch 00:   4%|▍         | 210/4850 [00:57<20:35,  3.75it/s]

Epoch: 1, Loss: 7.258784294128418


Processing epoch 00:   4%|▍         | 211/4850 [00:57<20:43,  3.73it/s]

Epoch: 1, Loss: 7.042245388031006


Processing epoch 00:   4%|▍         | 212/4850 [00:57<20:41,  3.74it/s]

Epoch: 1, Loss: 6.687490940093994


Processing epoch 00:   4%|▍         | 213/4850 [00:57<20:40,  3.74it/s]

Epoch: 1, Loss: 6.88972282409668


Processing epoch 00:   4%|▍         | 214/4850 [00:58<20:35,  3.75it/s]

Epoch: 1, Loss: 7.155577659606934


Processing epoch 00:   4%|▍         | 215/4850 [00:58<20:34,  3.75it/s]

Epoch: 1, Loss: 6.819056987762451


Processing epoch 00:   4%|▍         | 216/4850 [00:58<20:30,  3.77it/s]

Epoch: 1, Loss: 7.1134772300720215


Processing epoch 00:   4%|▍         | 217/4850 [00:58<20:27,  3.78it/s]

Epoch: 1, Loss: 6.965171813964844


Processing epoch 00:   4%|▍         | 218/4850 [00:59<20:24,  3.78it/s]

Epoch: 1, Loss: 6.967297554016113


Processing epoch 00:   5%|▍         | 219/4850 [00:59<20:21,  3.79it/s]

Epoch: 1, Loss: 6.843472003936768


Processing epoch 00:   5%|▍         | 220/4850 [00:59<20:20,  3.79it/s]

Epoch: 1, Loss: 7.071112155914307


Processing epoch 00:   5%|▍         | 221/4850 [01:00<20:20,  3.79it/s]

Epoch: 1, Loss: 7.155099868774414


Processing epoch 00:   5%|▍         | 222/4850 [01:00<20:24,  3.78it/s]

Epoch: 1, Loss: 6.661444187164307


Processing epoch 00:   5%|▍         | 223/4850 [01:00<20:27,  3.77it/s]

Epoch: 1, Loss: 7.080120086669922


Processing epoch 00:   5%|▍         | 224/4850 [01:00<20:25,  3.77it/s]

Epoch: 1, Loss: 7.344207286834717


Processing epoch 00:   5%|▍         | 225/4850 [01:01<20:25,  3.77it/s]

Epoch: 1, Loss: 7.096875190734863


Processing epoch 00:   5%|▍         | 226/4850 [01:01<20:23,  3.78it/s]

Epoch: 1, Loss: 7.020228385925293


Processing epoch 00:   5%|▍         | 227/4850 [01:01<20:21,  3.78it/s]

Epoch: 1, Loss: 6.702298641204834


Processing epoch 00:   5%|▍         | 228/4850 [01:01<20:23,  3.78it/s]

Epoch: 1, Loss: 6.7856011390686035


Processing epoch 00:   5%|▍         | 229/4850 [01:02<20:26,  3.77it/s]

Epoch: 1, Loss: 6.60706901550293


Processing epoch 00:   5%|▍         | 230/4850 [01:02<20:23,  3.78it/s]

Epoch: 1, Loss: 7.002740859985352


Processing epoch 00:   5%|▍         | 231/4850 [01:02<20:20,  3.78it/s]

Epoch: 1, Loss: 7.081856727600098


Processing epoch 00:   5%|▍         | 232/4850 [01:02<20:19,  3.79it/s]

Epoch: 1, Loss: 6.211476802825928


Processing epoch 00:   5%|▍         | 233/4850 [01:03<20:20,  3.78it/s]

Epoch: 1, Loss: 6.633913040161133


Processing epoch 00:   5%|▍         | 234/4850 [01:03<20:44,  3.71it/s]

Epoch: 1, Loss: 7.328622817993164


Processing epoch 00:   5%|▍         | 235/4850 [01:03<20:57,  3.67it/s]

Epoch: 1, Loss: 6.928960800170898


Processing epoch 00:   5%|▍         | 236/4850 [01:04<21:03,  3.65it/s]

Epoch: 1, Loss: 7.189303874969482


Processing epoch 00:   5%|▍         | 237/4850 [01:04<21:19,  3.60it/s]

Epoch: 1, Loss: 7.065796852111816


Processing epoch 00:   5%|▍         | 238/4850 [01:04<21:18,  3.61it/s]

Epoch: 1, Loss: 6.771610260009766


Processing epoch 00:   5%|▍         | 239/4850 [01:04<21:24,  3.59it/s]

Epoch: 1, Loss: 6.843141555786133


Processing epoch 00:   5%|▍         | 240/4850 [01:05<21:32,  3.57it/s]

Epoch: 1, Loss: 6.896588325500488


Processing epoch 00:   5%|▍         | 241/4850 [01:05<21:31,  3.57it/s]

Epoch: 1, Loss: 7.03605318069458


Processing epoch 00:   5%|▍         | 242/4850 [01:05<21:31,  3.57it/s]

Epoch: 1, Loss: 6.551398277282715


Processing epoch 00:   5%|▌         | 243/4850 [01:06<21:38,  3.55it/s]

Epoch: 1, Loss: 6.979889869689941


Processing epoch 00:   5%|▌         | 244/4850 [01:06<21:16,  3.61it/s]

Epoch: 1, Loss: 6.96573543548584


Processing epoch 00:   5%|▌         | 245/4850 [01:06<20:57,  3.66it/s]

Epoch: 1, Loss: 7.062861442565918


Processing epoch 00:   5%|▌         | 246/4850 [01:06<20:41,  3.71it/s]

Epoch: 1, Loss: 6.805937767028809


Processing epoch 00:   5%|▌         | 247/4850 [01:07<20:31,  3.74it/s]

Epoch: 1, Loss: 7.022373199462891


Processing epoch 00:   5%|▌         | 248/4850 [01:07<20:29,  3.74it/s]

Epoch: 1, Loss: 6.94161319732666


Processing epoch 00:   5%|▌         | 249/4850 [01:07<20:25,  3.75it/s]

Epoch: 1, Loss: 6.887786865234375


Processing epoch 00:   5%|▌         | 250/4850 [01:07<20:21,  3.76it/s]

Epoch: 1, Loss: 6.802279949188232


Processing epoch 00:   5%|▌         | 251/4850 [01:08<20:21,  3.77it/s]

Epoch: 1, Loss: 6.679498195648193


Processing epoch 00:   5%|▌         | 252/4850 [01:08<20:16,  3.78it/s]

Epoch: 1, Loss: 6.921620845794678


Processing epoch 00:   5%|▌         | 253/4850 [01:08<20:16,  3.78it/s]

Epoch: 1, Loss: 7.091116428375244


Processing epoch 00:   5%|▌         | 254/4850 [01:08<20:16,  3.78it/s]

Epoch: 1, Loss: 6.6342620849609375


Processing epoch 00:   5%|▌         | 255/4850 [01:09<20:18,  3.77it/s]

Epoch: 1, Loss: 6.567440986633301


Processing epoch 00:   5%|▌         | 256/4850 [01:09<20:16,  3.78it/s]

Epoch: 1, Loss: 6.896462440490723


Processing epoch 00:   5%|▌         | 257/4850 [01:09<20:25,  3.75it/s]

Epoch: 1, Loss: 6.762514114379883


Processing epoch 00:   5%|▌         | 258/4850 [01:10<20:19,  3.77it/s]

Epoch: 1, Loss: 6.429868698120117


Processing epoch 00:   5%|▌         | 259/4850 [01:10<20:13,  3.78it/s]

Epoch: 1, Loss: 6.754241466522217


Processing epoch 00:   5%|▌         | 260/4850 [01:10<20:14,  3.78it/s]

Epoch: 1, Loss: 6.358407020568848


Processing epoch 00:   5%|▌         | 261/4850 [01:10<20:10,  3.79it/s]

Epoch: 1, Loss: 6.945995807647705


Processing epoch 00:   5%|▌         | 262/4850 [01:11<20:06,  3.80it/s]

Epoch: 1, Loss: 6.706027030944824


Processing epoch 00:   5%|▌         | 263/4850 [01:11<20:10,  3.79it/s]

Epoch: 1, Loss: 6.515164852142334


Processing epoch 00:   5%|▌         | 264/4850 [01:11<20:11,  3.78it/s]

Epoch: 1, Loss: 6.665679931640625


Processing epoch 00:   5%|▌         | 265/4850 [01:11<20:10,  3.79it/s]

Epoch: 1, Loss: 6.6878461837768555


Processing epoch 00:   5%|▌         | 266/4850 [01:12<20:10,  3.79it/s]

Epoch: 1, Loss: 6.484087944030762


Processing epoch 00:   6%|▌         | 267/4850 [01:12<20:04,  3.80it/s]

Epoch: 1, Loss: 7.082409381866455


Processing epoch 00:   6%|▌         | 268/4850 [01:12<20:03,  3.81it/s]

Epoch: 1, Loss: 6.991240501403809


Processing epoch 00:   6%|▌         | 269/4850 [01:12<20:04,  3.80it/s]

Epoch: 1, Loss: 6.715080261230469


Processing epoch 00:   6%|▌         | 270/4850 [01:13<20:01,  3.81it/s]

Epoch: 1, Loss: 7.05116081237793


Processing epoch 00:   6%|▌         | 271/4850 [01:13<20:03,  3.80it/s]

Epoch: 1, Loss: 6.77937126159668


Processing epoch 00:   6%|▌         | 272/4850 [01:13<20:04,  3.80it/s]

Epoch: 1, Loss: 6.235877513885498


Processing epoch 00:   6%|▌         | 273/4850 [01:13<20:05,  3.80it/s]

Epoch: 1, Loss: 6.461462497711182


Processing epoch 00:   6%|▌         | 274/4850 [01:14<20:08,  3.79it/s]

Epoch: 1, Loss: 6.565688610076904


Processing epoch 00:   6%|▌         | 275/4850 [01:14<20:04,  3.80it/s]

Epoch: 1, Loss: 6.723870754241943


Processing epoch 00:   6%|▌         | 276/4850 [01:14<20:06,  3.79it/s]

Epoch: 1, Loss: 6.761914253234863


Processing epoch 00:   6%|▌         | 277/4850 [01:15<20:18,  3.75it/s]

Epoch: 1, Loss: 6.727010250091553


Processing epoch 00:   6%|▌         | 278/4850 [01:15<20:14,  3.76it/s]

Epoch: 1, Loss: 6.522319793701172


Processing epoch 00:   6%|▌         | 279/4850 [01:15<20:17,  3.75it/s]

Epoch: 1, Loss: 6.20637321472168


Processing epoch 00:   6%|▌         | 280/4850 [01:15<20:10,  3.78it/s]

Epoch: 1, Loss: 7.144543170928955


Processing epoch 00:   6%|▌         | 281/4850 [01:16<20:24,  3.73it/s]

Epoch: 1, Loss: 7.054447650909424


Processing epoch 00:   6%|▌         | 282/4850 [01:16<20:51,  3.65it/s]

Epoch: 1, Loss: 6.624386310577393


Processing epoch 00:   6%|▌         | 283/4850 [01:16<21:03,  3.61it/s]

Epoch: 1, Loss: 6.744924068450928


Processing epoch 00:   6%|▌         | 284/4850 [01:16<21:24,  3.55it/s]

Epoch: 1, Loss: 6.515441417694092


Processing epoch 00:   6%|▌         | 285/4850 [01:17<21:35,  3.52it/s]

Epoch: 1, Loss: 6.42175817489624


Processing epoch 00:   6%|▌         | 286/4850 [01:17<21:50,  3.48it/s]

Epoch: 1, Loss: 6.5780229568481445


Processing epoch 00:   6%|▌         | 287/4850 [01:17<21:54,  3.47it/s]

Epoch: 1, Loss: 6.957579135894775


Processing epoch 00:   6%|▌         | 288/4850 [01:18<21:46,  3.49it/s]

Epoch: 1, Loss: 6.996196746826172


Processing epoch 00:   6%|▌         | 289/4850 [01:18<21:41,  3.51it/s]

Epoch: 1, Loss: 6.608578205108643


Processing epoch 00:   6%|▌         | 290/4850 [01:18<21:39,  3.51it/s]

Epoch: 1, Loss: 7.060542106628418


Processing epoch 00:   6%|▌         | 291/4850 [01:18<21:13,  3.58it/s]

Epoch: 1, Loss: 6.880061149597168


Processing epoch 00:   6%|▌         | 292/4850 [01:19<20:53,  3.64it/s]

Epoch: 1, Loss: 6.959205627441406


Processing epoch 00:   6%|▌         | 293/4850 [01:19<20:36,  3.68it/s]

Epoch: 1, Loss: 6.34487247467041


Processing epoch 00:   6%|▌         | 294/4850 [01:19<20:25,  3.72it/s]

Epoch: 1, Loss: 6.4987101554870605


Processing epoch 00:   6%|▌         | 295/4850 [01:19<20:27,  3.71it/s]

Epoch: 1, Loss: 6.319791793823242


Processing epoch 00:   6%|▌         | 296/4850 [01:20<20:21,  3.73it/s]

Epoch: 1, Loss: 6.940517902374268


Processing epoch 00:   6%|▌         | 297/4850 [01:20<20:16,  3.74it/s]

Epoch: 1, Loss: 6.9686760902404785


Processing epoch 00:   6%|▌         | 298/4850 [01:20<20:13,  3.75it/s]

Epoch: 1, Loss: 6.732357025146484


Processing epoch 00:   6%|▌         | 299/4850 [01:21<20:19,  3.73it/s]

Epoch: 1, Loss: 7.291172981262207


Processing epoch 00:   6%|▌         | 300/4850 [01:21<20:10,  3.76it/s]

Epoch: 1, Loss: 6.968939781188965


Processing epoch 00:   6%|▌         | 301/4850 [01:21<20:12,  3.75it/s]

Epoch: 1, Loss: 6.695669174194336


Processing epoch 00:   6%|▌         | 302/4850 [01:21<20:08,  3.76it/s]

Epoch: 1, Loss: 6.3637895584106445


Processing epoch 00:   6%|▌         | 303/4850 [01:22<20:14,  3.74it/s]

Epoch: 1, Loss: 6.6975297927856445


Processing epoch 00:   6%|▋         | 304/4850 [01:22<20:12,  3.75it/s]

Epoch: 1, Loss: 6.461000919342041


Processing epoch 00:   6%|▋         | 305/4850 [01:22<20:08,  3.76it/s]

Epoch: 1, Loss: 7.016565322875977


Processing epoch 00:   6%|▋         | 306/4850 [01:22<20:07,  3.76it/s]

Epoch: 1, Loss: 6.743937015533447


Processing epoch 00:   6%|▋         | 307/4850 [01:23<20:03,  3.78it/s]

Epoch: 1, Loss: 6.676710605621338


Processing epoch 00:   6%|▋         | 308/4850 [01:23<19:59,  3.79it/s]

Epoch: 1, Loss: 6.802058219909668


Processing epoch 00:   6%|▋         | 309/4850 [01:23<19:57,  3.79it/s]

Epoch: 1, Loss: 6.924790382385254


Processing epoch 00:   6%|▋         | 310/4850 [01:23<19:57,  3.79it/s]

Epoch: 1, Loss: 6.756929874420166


Processing epoch 00:   6%|▋         | 311/4850 [01:24<20:03,  3.77it/s]

Epoch: 1, Loss: 6.592960357666016


Processing epoch 00:   6%|▋         | 312/4850 [01:24<20:02,  3.77it/s]

Epoch: 1, Loss: 6.267727851867676


Processing epoch 00:   6%|▋         | 313/4850 [01:24<20:01,  3.77it/s]

Epoch: 1, Loss: 6.8381476402282715


Processing epoch 00:   6%|▋         | 314/4850 [01:25<20:00,  3.78it/s]

Epoch: 1, Loss: 6.9678730964660645


Processing epoch 00:   6%|▋         | 315/4850 [01:25<19:54,  3.80it/s]

Epoch: 1, Loss: 6.831047058105469


Processing epoch 00:   7%|▋         | 316/4850 [01:25<19:55,  3.79it/s]

Epoch: 1, Loss: 7.019346714019775


Processing epoch 00:   7%|▋         | 317/4850 [01:25<19:54,  3.80it/s]

Epoch: 1, Loss: 6.624548435211182


Processing epoch 00:   7%|▋         | 318/4850 [01:26<19:53,  3.80it/s]

Epoch: 1, Loss: 7.208768367767334


Processing epoch 00:   7%|▋         | 319/4850 [01:26<19:53,  3.80it/s]

Epoch: 1, Loss: 7.176026344299316


Processing epoch 00:   7%|▋         | 320/4850 [01:26<19:51,  3.80it/s]

Epoch: 1, Loss: 6.478596210479736


Processing epoch 00:   7%|▋         | 321/4850 [01:26<19:50,  3.80it/s]

Epoch: 1, Loss: 6.844243049621582


Processing epoch 00:   7%|▋         | 322/4850 [01:27<19:53,  3.79it/s]

Epoch: 1, Loss: 6.2764692306518555


Processing epoch 00:   7%|▋         | 323/4850 [01:27<19:51,  3.80it/s]

Epoch: 1, Loss: 7.229648590087891


Processing epoch 00:   7%|▋         | 324/4850 [01:27<19:53,  3.79it/s]

Epoch: 1, Loss: 6.730330467224121


Processing epoch 00:   7%|▋         | 325/4850 [01:27<19:58,  3.77it/s]

Epoch: 1, Loss: 6.886088848114014


Processing epoch 00:   7%|▋         | 326/4850 [01:28<20:13,  3.73it/s]

Epoch: 1, Loss: 6.606784820556641


Processing epoch 00:   7%|▋         | 327/4850 [01:28<20:02,  3.76it/s]

Epoch: 1, Loss: 7.254777431488037


Processing epoch 00:   7%|▋         | 328/4850 [01:28<20:14,  3.72it/s]

Epoch: 1, Loss: 6.757995128631592


Processing epoch 00:   7%|▋         | 329/4850 [01:29<20:26,  3.69it/s]

Epoch: 1, Loss: 7.158777713775635


Processing epoch 00:   7%|▋         | 330/4850 [01:29<20:56,  3.60it/s]

Epoch: 1, Loss: 7.142692565917969


Processing epoch 00:   7%|▋         | 331/4850 [01:29<20:56,  3.60it/s]

Epoch: 1, Loss: 6.8412370681762695


Processing epoch 00:   7%|▋         | 332/4850 [01:29<20:54,  3.60it/s]

Epoch: 1, Loss: 6.993636608123779


Processing epoch 00:   7%|▋         | 333/4850 [01:30<21:02,  3.58it/s]

Epoch: 1, Loss: 6.539748191833496


Processing epoch 00:   7%|▋         | 334/4850 [01:30<21:26,  3.51it/s]

Epoch: 1, Loss: 5.918283462524414


Processing epoch 00:   7%|▋         | 335/4850 [01:30<21:28,  3.50it/s]

Epoch: 1, Loss: 6.528034687042236


Processing epoch 00:   7%|▋         | 336/4850 [01:31<21:27,  3.50it/s]

Epoch: 1, Loss: 6.775671005249023


Processing epoch 00:   7%|▋         | 337/4850 [01:31<21:25,  3.51it/s]

Epoch: 1, Loss: 6.982405662536621


Processing epoch 00:   7%|▋         | 338/4850 [01:31<21:00,  3.58it/s]

Epoch: 1, Loss: 6.5573320388793945


Processing epoch 00:   7%|▋         | 339/4850 [01:31<20:39,  3.64it/s]

Epoch: 1, Loss: 7.026671409606934


Processing epoch 00:   7%|▋         | 340/4850 [01:32<20:23,  3.69it/s]

Epoch: 1, Loss: 7.010607719421387


Processing epoch 00:   7%|▋         | 341/4850 [01:32<20:17,  3.70it/s]

Epoch: 1, Loss: 6.6232757568359375


Processing epoch 00:   7%|▋         | 342/4850 [01:32<20:07,  3.73it/s]

Epoch: 1, Loss: 6.650060653686523


Processing epoch 00:   7%|▋         | 343/4850 [01:32<20:08,  3.73it/s]

Epoch: 1, Loss: 6.570985794067383


Processing epoch 00:   7%|▋         | 344/4850 [01:33<20:06,  3.73it/s]

Epoch: 1, Loss: 6.626639366149902


Processing epoch 00:   7%|▋         | 345/4850 [01:33<20:17,  3.70it/s]

Epoch: 1, Loss: 6.9482011795043945


Processing epoch 00:   7%|▋         | 346/4850 [01:33<20:09,  3.72it/s]

Epoch: 1, Loss: 6.7971014976501465


Processing epoch 00:   7%|▋         | 347/4850 [01:33<20:02,  3.74it/s]

Epoch: 1, Loss: 6.691936492919922


Processing epoch 00:   7%|▋         | 348/4850 [01:34<19:57,  3.76it/s]

Epoch: 1, Loss: 6.918668270111084


Processing epoch 00:   7%|▋         | 349/4850 [01:34<19:57,  3.76it/s]

Epoch: 1, Loss: 7.011847496032715


Processing epoch 00:   7%|▋         | 350/4850 [01:34<19:59,  3.75it/s]

Epoch: 1, Loss: 6.801395416259766


Processing epoch 00:   7%|▋         | 351/4850 [01:35<20:02,  3.74it/s]

Epoch: 1, Loss: 6.877778053283691


Processing epoch 00:   7%|▋         | 352/4850 [01:35<20:01,  3.74it/s]

Epoch: 1, Loss: 6.956264495849609


Processing epoch 00:   7%|▋         | 353/4850 [01:35<19:59,  3.75it/s]

Epoch: 1, Loss: 6.147153377532959


Processing epoch 00:   7%|▋         | 354/4850 [01:35<19:52,  3.77it/s]

Epoch: 1, Loss: 6.983187198638916


Processing epoch 00:   7%|▋         | 355/4850 [01:36<19:55,  3.76it/s]

Epoch: 1, Loss: 6.243756294250488


Processing epoch 00:   7%|▋         | 356/4850 [01:36<19:51,  3.77it/s]

Epoch: 1, Loss: 6.752459526062012


Processing epoch 00:   7%|▋         | 357/4850 [01:36<19:53,  3.76it/s]

Epoch: 1, Loss: 6.639500617980957


Processing epoch 00:   7%|▋         | 358/4850 [01:36<19:56,  3.75it/s]

Epoch: 1, Loss: 6.735391616821289


Processing epoch 00:   7%|▋         | 359/4850 [01:37<19:56,  3.75it/s]

Epoch: 1, Loss: 6.223943710327148


Processing epoch 00:   7%|▋         | 360/4850 [01:37<19:57,  3.75it/s]

Epoch: 1, Loss: 6.896893501281738


Processing epoch 00:   7%|▋         | 361/4850 [01:37<19:58,  3.75it/s]

Epoch: 1, Loss: 6.268362998962402


Processing epoch 00:   7%|▋         | 362/4850 [01:37<19:57,  3.75it/s]

Epoch: 1, Loss: 6.678737163543701


Processing epoch 00:   7%|▋         | 363/4850 [01:38<19:58,  3.74it/s]

Epoch: 1, Loss: 6.763072490692139


Processing epoch 00:   8%|▊         | 364/4850 [01:38<20:03,  3.73it/s]

Epoch: 1, Loss: 6.5037150382995605


Processing epoch 00:   8%|▊         | 365/4850 [01:38<19:59,  3.74it/s]

Epoch: 1, Loss: 6.686618804931641


Processing epoch 00:   8%|▊         | 366/4850 [01:39<19:57,  3.75it/s]

Epoch: 1, Loss: 6.696954250335693


Processing epoch 00:   8%|▊         | 367/4850 [01:39<19:52,  3.76it/s]

Epoch: 1, Loss: 6.75935173034668


Processing epoch 00:   8%|▊         | 368/4850 [01:39<19:54,  3.75it/s]

Epoch: 1, Loss: 6.970541954040527


Processing epoch 00:   8%|▊         | 369/4850 [01:39<19:52,  3.76it/s]

Epoch: 1, Loss: 6.501166820526123


Processing epoch 00:   8%|▊         | 370/4850 [01:40<19:56,  3.75it/s]

Epoch: 1, Loss: 6.725162029266357


Processing epoch 00:   8%|▊         | 371/4850 [01:40<19:57,  3.74it/s]

Epoch: 1, Loss: 6.717660903930664


Processing epoch 00:   8%|▊         | 372/4850 [01:40<20:00,  3.73it/s]

Epoch: 1, Loss: 6.533964157104492


Processing epoch 00:   8%|▊         | 373/4850 [01:40<19:58,  3.74it/s]

Epoch: 1, Loss: 6.914837837219238


Processing epoch 00:   8%|▊         | 374/4850 [01:41<19:56,  3.74it/s]

Epoch: 1, Loss: 6.64276647567749


Processing epoch 00:   8%|▊         | 375/4850 [01:41<20:24,  3.66it/s]

Epoch: 1, Loss: 6.3715596199035645


Processing epoch 00:   8%|▊         | 376/4850 [01:41<20:39,  3.61it/s]

Epoch: 1, Loss: 5.795367240905762


Processing epoch 00:   8%|▊         | 377/4850 [01:42<20:44,  3.59it/s]

Epoch: 1, Loss: 7.178276062011719


Processing epoch 00:   8%|▊         | 378/4850 [01:42<20:55,  3.56it/s]

Epoch: 1, Loss: 6.754059314727783


Processing epoch 00:   8%|▊         | 379/4850 [01:42<20:57,  3.56it/s]

Epoch: 1, Loss: 6.467754364013672


Processing epoch 00:   8%|▊         | 380/4850 [01:42<21:07,  3.53it/s]

Epoch: 1, Loss: 6.71727180480957


Processing epoch 00:   8%|▊         | 381/4850 [01:43<21:35,  3.45it/s]

Epoch: 1, Loss: 6.8185858726501465


Processing epoch 00:   8%|▊         | 382/4850 [01:43<21:31,  3.46it/s]

Epoch: 1, Loss: 6.486780166625977


Processing epoch 00:   8%|▊         | 383/4850 [01:43<21:50,  3.41it/s]

Epoch: 1, Loss: 6.65915584564209


Processing epoch 00:   8%|▊         | 384/4850 [01:44<21:09,  3.52it/s]

Epoch: 1, Loss: 6.725957870483398


Processing epoch 00:   8%|▊         | 385/4850 [01:44<20:53,  3.56it/s]

Epoch: 1, Loss: 6.930142402648926


Processing epoch 00:   8%|▊         | 386/4850 [01:44<20:33,  3.62it/s]

Epoch: 1, Loss: 6.668503284454346


Processing epoch 00:   8%|▊         | 387/4850 [01:44<20:16,  3.67it/s]

Epoch: 1, Loss: 6.491428852081299


Processing epoch 00:   8%|▊         | 388/4850 [01:45<20:07,  3.69it/s]

Epoch: 1, Loss: 6.997813701629639


Processing epoch 00:   8%|▊         | 389/4850 [01:45<19:57,  3.73it/s]

Epoch: 1, Loss: 6.7723388671875


Processing epoch 00:   8%|▊         | 390/4850 [01:45<19:54,  3.73it/s]

Epoch: 1, Loss: 7.164061546325684


Processing epoch 00:   8%|▊         | 391/4850 [01:45<19:56,  3.73it/s]

Epoch: 1, Loss: 6.6975297927856445


Processing epoch 00:   8%|▊         | 392/4850 [01:46<19:56,  3.73it/s]

Epoch: 1, Loss: 6.761144161224365


Processing epoch 00:   8%|▊         | 393/4850 [01:46<19:54,  3.73it/s]

Epoch: 1, Loss: 6.612074375152588


Processing epoch 00:   8%|▊         | 394/4850 [01:46<19:52,  3.74it/s]

Epoch: 1, Loss: 6.746543884277344


Processing epoch 00:   8%|▊         | 395/4850 [01:46<19:45,  3.76it/s]

Epoch: 1, Loss: 6.782766819000244


Processing epoch 00:   8%|▊         | 396/4850 [01:47<19:43,  3.76it/s]

Epoch: 1, Loss: 6.558817386627197


Processing epoch 00:   8%|▊         | 397/4850 [01:47<19:52,  3.73it/s]

Epoch: 1, Loss: 6.530879020690918


Processing epoch 00:   8%|▊         | 398/4850 [01:47<19:51,  3.74it/s]

Epoch: 1, Loss: 6.883269786834717


Processing epoch 00:   8%|▊         | 399/4850 [01:48<19:50,  3.74it/s]

Epoch: 1, Loss: 7.191773414611816


Processing epoch 00:   8%|▊         | 400/4850 [01:48<19:50,  3.74it/s]

Epoch: 1, Loss: 6.382676601409912


Processing epoch 00:   8%|▊         | 401/4850 [01:48<19:50,  3.74it/s]

Epoch: 1, Loss: 6.604036808013916


Processing epoch 00:   8%|▊         | 402/4850 [01:48<19:54,  3.72it/s]

Epoch: 1, Loss: 6.6913909912109375


Processing epoch 00:   8%|▊         | 403/4850 [01:49<19:55,  3.72it/s]

Epoch: 1, Loss: 6.814080238342285


Processing epoch 00:   8%|▊         | 404/4850 [01:49<19:51,  3.73it/s]

Epoch: 1, Loss: 6.303152561187744


Processing epoch 00:   8%|▊         | 405/4850 [01:49<19:43,  3.76it/s]

Epoch: 1, Loss: 6.857854843139648


Processing epoch 00:   8%|▊         | 406/4850 [01:49<19:50,  3.73it/s]

Epoch: 1, Loss: 6.7923903465271


Processing epoch 00:   8%|▊         | 407/4850 [01:50<19:48,  3.74it/s]

Epoch: 1, Loss: 7.081295490264893


Processing epoch 00:   8%|▊         | 408/4850 [01:50<19:47,  3.74it/s]

Epoch: 1, Loss: 6.354526996612549


Processing epoch 00:   8%|▊         | 409/4850 [01:50<19:50,  3.73it/s]

Epoch: 1, Loss: 6.280269145965576


Processing epoch 00:   8%|▊         | 410/4850 [01:50<19:50,  3.73it/s]

Epoch: 1, Loss: 6.579552173614502


Processing epoch 00:   8%|▊         | 411/4850 [01:51<19:58,  3.70it/s]

Epoch: 1, Loss: 6.8420233726501465


Processing epoch 00:   8%|▊         | 412/4850 [01:51<20:06,  3.68it/s]

Epoch: 1, Loss: 6.61919641494751


Processing epoch 00:   9%|▊         | 413/4850 [01:51<19:58,  3.70it/s]

Epoch: 1, Loss: 6.122818470001221


Processing epoch 00:   9%|▊         | 414/4850 [01:52<19:52,  3.72it/s]

Epoch: 1, Loss: 6.283019065856934


Processing epoch 00:   9%|▊         | 415/4850 [01:52<19:49,  3.73it/s]

Epoch: 1, Loss: 6.491989612579346


Processing epoch 00:   9%|▊         | 416/4850 [01:52<19:49,  3.73it/s]

Epoch: 1, Loss: 6.470137119293213


Processing epoch 00:   9%|▊         | 417/4850 [01:52<19:46,  3.73it/s]

Epoch: 1, Loss: 6.768606185913086


Processing epoch 00:   9%|▊         | 418/4850 [01:53<19:46,  3.74it/s]

Epoch: 1, Loss: 6.581204414367676


Processing epoch 00:   9%|▊         | 419/4850 [01:53<19:45,  3.74it/s]

Epoch: 1, Loss: 6.492724418640137


Processing epoch 00:   9%|▊         | 420/4850 [01:53<19:41,  3.75it/s]

Epoch: 1, Loss: 6.490358829498291


Processing epoch 00:   9%|▊         | 421/4850 [01:53<19:47,  3.73it/s]

Epoch: 1, Loss: 6.8634843826293945


Processing epoch 00:   9%|▊         | 422/4850 [01:54<20:14,  3.65it/s]

Epoch: 1, Loss: 6.2747297286987305


Processing epoch 00:   9%|▊         | 423/4850 [01:54<20:36,  3.58it/s]

Epoch: 1, Loss: 6.677820205688477


Processing epoch 00:   9%|▊         | 424/4850 [01:54<20:36,  3.58it/s]

Epoch: 1, Loss: 6.329658031463623


Processing epoch 00:   9%|▉         | 425/4850 [01:55<20:50,  3.54it/s]

Epoch: 1, Loss: 6.916622638702393


Processing epoch 00:   9%|▉         | 426/4850 [01:55<20:59,  3.51it/s]

Epoch: 1, Loss: 6.381422519683838


Processing epoch 00:   9%|▉         | 427/4850 [01:55<21:06,  3.49it/s]

Epoch: 1, Loss: 6.315703868865967


Processing epoch 00:   9%|▉         | 428/4850 [01:55<21:04,  3.50it/s]

Epoch: 1, Loss: 6.971264362335205


Processing epoch 00:   9%|▉         | 429/4850 [01:56<20:59,  3.51it/s]

Epoch: 1, Loss: 6.91533899307251


Processing epoch 00:   9%|▉         | 430/4850 [01:56<20:59,  3.51it/s]

Epoch: 1, Loss: 6.649893283843994


Processing epoch 00:   9%|▉         | 431/4850 [01:56<20:34,  3.58it/s]

Epoch: 1, Loss: 6.454580783843994


Processing epoch 00:   9%|▉         | 432/4850 [01:57<20:18,  3.63it/s]

Epoch: 1, Loss: 6.783169746398926


Processing epoch 00:   9%|▉         | 433/4850 [01:57<20:07,  3.66it/s]

Epoch: 1, Loss: 6.386552810668945


Processing epoch 00:   9%|▉         | 434/4850 [01:57<20:01,  3.68it/s]

Epoch: 1, Loss: 6.870972633361816


Processing epoch 00:   9%|▉         | 435/4850 [01:57<19:55,  3.69it/s]

Epoch: 1, Loss: 6.784426689147949


Processing epoch 00:   9%|▉         | 436/4850 [01:58<19:58,  3.68it/s]

Epoch: 1, Loss: 6.337791442871094


Processing epoch 00:   9%|▉         | 437/4850 [01:58<19:54,  3.70it/s]

Epoch: 1, Loss: 6.5978684425354


Processing epoch 00:   9%|▉         | 438/4850 [01:58<19:46,  3.72it/s]

Epoch: 1, Loss: 6.321106910705566


Processing epoch 00:   9%|▉         | 439/4850 [01:58<19:40,  3.74it/s]

Epoch: 1, Loss: 7.00265645980835


Processing epoch 00:   9%|▉         | 440/4850 [01:59<19:44,  3.72it/s]

Epoch: 1, Loss: 7.051019668579102


Processing epoch 00:   9%|▉         | 441/4850 [01:59<19:44,  3.72it/s]

Epoch: 1, Loss: 6.563621520996094


Processing epoch 00:   9%|▉         | 442/4850 [01:59<19:43,  3.72it/s]

Epoch: 1, Loss: 6.775327682495117


Processing epoch 00:   9%|▉         | 443/4850 [02:00<19:42,  3.73it/s]

Epoch: 1, Loss: 6.615561485290527


Processing epoch 00:   9%|▉         | 444/4850 [02:00<19:44,  3.72it/s]

Epoch: 1, Loss: 6.403529167175293


Processing epoch 00:   9%|▉         | 445/4850 [02:00<19:45,  3.72it/s]

Epoch: 1, Loss: 6.962216854095459


Processing epoch 00:   9%|▉         | 446/4850 [02:00<19:45,  3.71it/s]

Epoch: 1, Loss: 6.43576717376709


Processing epoch 00:   9%|▉         | 447/4850 [02:01<19:42,  3.72it/s]

Epoch: 1, Loss: 6.8656792640686035


Processing epoch 00:   9%|▉         | 448/4850 [02:01<19:48,  3.70it/s]

Epoch: 1, Loss: 5.971627235412598


Processing epoch 00:   9%|▉         | 449/4850 [02:01<19:48,  3.70it/s]

Epoch: 1, Loss: 6.27180290222168


Processing epoch 00:   9%|▉         | 450/4850 [02:01<19:44,  3.72it/s]

Epoch: 1, Loss: 6.422509670257568


Processing epoch 00:   9%|▉         | 451/4850 [02:02<19:40,  3.73it/s]

Epoch: 1, Loss: 7.071380138397217


Processing epoch 00:   9%|▉         | 452/4850 [02:02<19:41,  3.72it/s]

Epoch: 1, Loss: 6.56235408782959


Processing epoch 00:   9%|▉         | 453/4850 [02:02<19:41,  3.72it/s]

Epoch: 1, Loss: 5.900661945343018


Processing epoch 00:   9%|▉         | 454/4850 [02:02<19:41,  3.72it/s]

Epoch: 1, Loss: 6.4740777015686035


Processing epoch 00:   9%|▉         | 455/4850 [02:03<19:45,  3.71it/s]

Epoch: 1, Loss: 6.872342109680176


Processing epoch 00:   9%|▉         | 456/4850 [02:03<19:37,  3.73it/s]

Epoch: 1, Loss: 6.678248405456543


Processing epoch 00:   9%|▉         | 457/4850 [02:03<19:34,  3.74it/s]

Epoch: 1, Loss: 6.681035995483398


Processing epoch 00:   9%|▉         | 458/4850 [02:04<19:32,  3.75it/s]

Epoch: 1, Loss: 6.651089668273926


Processing epoch 00:   9%|▉         | 459/4850 [02:04<19:42,  3.71it/s]

Epoch: 1, Loss: 6.3022871017456055


Processing epoch 00:   9%|▉         | 460/4850 [02:04<19:40,  3.72it/s]

Epoch: 1, Loss: 6.667065620422363


Processing epoch 00:  10%|▉         | 461/4850 [02:04<19:36,  3.73it/s]

Epoch: 1, Loss: 6.391502380371094


Processing epoch 00:  10%|▉         | 462/4850 [02:05<19:35,  3.73it/s]

Epoch: 1, Loss: 6.856851100921631


Processing epoch 00:  10%|▉         | 463/4850 [02:05<19:37,  3.72it/s]

Epoch: 1, Loss: 6.439230442047119


Processing epoch 00:  10%|▉         | 464/4850 [02:05<19:35,  3.73it/s]

Epoch: 1, Loss: 6.8274641036987305


Processing epoch 00:  10%|▉         | 465/4850 [02:05<19:35,  3.73it/s]

Epoch: 1, Loss: 6.254465103149414


Processing epoch 00:  10%|▉         | 466/4850 [02:06<19:30,  3.74it/s]

Epoch: 1, Loss: 6.976607322692871


Processing epoch 00:  10%|▉         | 467/4850 [02:06<19:26,  3.76it/s]

Epoch: 1, Loss: 6.874812126159668


Processing epoch 00:  10%|▉         | 468/4850 [02:06<19:47,  3.69it/s]

Epoch: 1, Loss: 6.234042644500732


Processing epoch 00:  10%|▉         | 469/4850 [02:07<20:00,  3.65it/s]

Epoch: 1, Loss: 6.664936065673828


Processing epoch 00:  10%|▉         | 470/4850 [02:07<20:08,  3.62it/s]

Epoch: 1, Loss: 6.629744529724121


Processing epoch 00:  10%|▉         | 471/4850 [02:07<20:17,  3.60it/s]

Epoch: 1, Loss: 6.381010055541992


Processing epoch 00:  10%|▉         | 472/4850 [02:07<20:20,  3.59it/s]

Epoch: 1, Loss: 7.162251949310303


Processing epoch 00:  10%|▉         | 473/4850 [02:08<20:29,  3.56it/s]

Epoch: 1, Loss: 6.691091537475586


Processing epoch 00:  10%|▉         | 474/4850 [02:08<20:47,  3.51it/s]

Epoch: 1, Loss: 6.582881927490234


Processing epoch 00:  10%|▉         | 475/4850 [02:08<20:48,  3.50it/s]

Epoch: 1, Loss: 6.123562812805176


Processing epoch 00:  10%|▉         | 476/4850 [02:09<21:00,  3.47it/s]

Epoch: 1, Loss: 5.917191982269287


Processing epoch 00:  10%|▉         | 477/4850 [02:09<20:30,  3.56it/s]

Epoch: 1, Loss: 6.458300590515137


Processing epoch 00:  10%|▉         | 478/4850 [02:09<20:11,  3.61it/s]

Epoch: 1, Loss: 6.894603729248047


Processing epoch 00:  10%|▉         | 479/4850 [02:09<20:01,  3.64it/s]

Epoch: 1, Loss: 6.860410690307617


Processing epoch 00:  10%|▉         | 480/4850 [02:10<19:49,  3.67it/s]

Epoch: 1, Loss: 6.15621280670166


Processing epoch 00:  10%|▉         | 481/4850 [02:10<19:42,  3.69it/s]

Epoch: 1, Loss: 6.926743984222412


Processing epoch 00:  10%|▉         | 482/4850 [02:10<19:34,  3.72it/s]

Epoch: 1, Loss: 6.590271472930908


Processing epoch 00:  10%|▉         | 483/4850 [02:10<19:31,  3.73it/s]

Epoch: 1, Loss: 6.803526401519775


Processing epoch 00:  10%|▉         | 484/4850 [02:11<19:26,  3.74it/s]

Epoch: 1, Loss: 6.700450897216797


Processing epoch 00:  10%|█         | 485/4850 [02:11<19:27,  3.74it/s]

Epoch: 1, Loss: 6.437173843383789


Processing epoch 00:  10%|█         | 486/4850 [02:11<19:26,  3.74it/s]

Epoch: 1, Loss: 6.863775253295898


Processing epoch 00:  10%|█         | 487/4850 [02:11<19:28,  3.74it/s]

Epoch: 1, Loss: 6.823920249938965


Processing epoch 00:  10%|█         | 488/4850 [02:12<19:24,  3.75it/s]

Epoch: 1, Loss: 6.2926201820373535


Processing epoch 00:  10%|█         | 489/4850 [02:12<19:25,  3.74it/s]

Epoch: 1, Loss: 6.274101734161377


Processing epoch 00:  10%|█         | 490/4850 [02:12<19:24,  3.74it/s]

Epoch: 1, Loss: 6.7999396324157715


Processing epoch 00:  10%|█         | 491/4850 [02:13<19:22,  3.75it/s]

Epoch: 1, Loss: 5.936610221862793


Processing epoch 00:  10%|█         | 492/4850 [02:13<19:23,  3.75it/s]

Epoch: 1, Loss: 5.801850318908691


Processing epoch 00:  10%|█         | 493/4850 [02:13<19:31,  3.72it/s]

Epoch: 1, Loss: 6.282659530639648


Processing epoch 00:  10%|█         | 494/4850 [02:13<19:28,  3.73it/s]

Epoch: 1, Loss: 6.847174644470215


Processing epoch 00:  10%|█         | 495/4850 [02:14<19:26,  3.73it/s]

Epoch: 1, Loss: 6.5779805183410645


Processing epoch 00:  10%|█         | 496/4850 [02:14<19:22,  3.75it/s]

Epoch: 1, Loss: 6.489502906799316


Processing epoch 00:  10%|█         | 497/4850 [02:14<19:20,  3.75it/s]

Epoch: 1, Loss: 6.855790615081787


Processing epoch 00:  10%|█         | 498/4850 [02:14<19:22,  3.74it/s]

Epoch: 1, Loss: 6.675825119018555


Processing epoch 00:  10%|█         | 499/4850 [02:15<19:22,  3.74it/s]

Epoch: 1, Loss: 6.635277271270752


Processing epoch 00:  10%|█         | 500/4850 [02:15<19:22,  3.74it/s]

Epoch: 1, Loss: 7.02377462387085


Processing epoch 00:  10%|█         | 501/4850 [02:15<19:26,  3.73it/s]

Epoch: 1, Loss: 6.6158037185668945


Processing epoch 00:  10%|█         | 502/4850 [02:15<19:22,  3.74it/s]

Epoch: 1, Loss: 6.9663286209106445


Processing epoch 00:  10%|█         | 503/4850 [02:16<19:24,  3.73it/s]

Epoch: 1, Loss: 6.244533061981201


Processing epoch 00:  10%|█         | 504/4850 [02:16<19:24,  3.73it/s]

Epoch: 1, Loss: 6.595848083496094


Processing epoch 00:  10%|█         | 505/4850 [02:16<19:25,  3.73it/s]

Epoch: 1, Loss: 6.3118391036987305


Processing epoch 00:  10%|█         | 506/4850 [02:17<19:27,  3.72it/s]

Epoch: 1, Loss: 6.29257345199585


Processing epoch 00:  10%|█         | 507/4850 [02:17<19:23,  3.73it/s]

Epoch: 1, Loss: 7.0092267990112305


Processing epoch 00:  10%|█         | 508/4850 [02:17<19:23,  3.73it/s]

Epoch: 1, Loss: 6.373203277587891


Processing epoch 00:  10%|█         | 509/4850 [02:17<19:29,  3.71it/s]

Epoch: 1, Loss: 6.527169227600098


Processing epoch 00:  11%|█         | 510/4850 [02:18<19:23,  3.73it/s]

Epoch: 1, Loss: 6.854794502258301


Processing epoch 00:  11%|█         | 511/4850 [02:18<19:24,  3.73it/s]

Epoch: 1, Loss: 6.404118061065674


Processing epoch 00:  11%|█         | 512/4850 [02:18<19:26,  3.72it/s]

Epoch: 1, Loss: 6.89675760269165


Processing epoch 00:  11%|█         | 513/4850 [02:18<19:25,  3.72it/s]

Epoch: 1, Loss: 6.305935859680176


Processing epoch 00:  11%|█         | 514/4850 [02:19<19:46,  3.65it/s]

Epoch: 1, Loss: 6.602515697479248


Processing epoch 00:  11%|█         | 515/4850 [02:19<19:58,  3.62it/s]

Epoch: 1, Loss: 6.715880870819092


Processing epoch 00:  11%|█         | 516/4850 [02:19<20:03,  3.60it/s]

Epoch: 1, Loss: 6.6993021965026855


Processing epoch 00:  11%|█         | 517/4850 [02:20<20:07,  3.59it/s]

Epoch: 1, Loss: 7.112743854522705


Processing epoch 00:  11%|█         | 518/4850 [02:20<20:10,  3.58it/s]

Epoch: 1, Loss: 6.739628314971924


Processing epoch 00:  11%|█         | 519/4850 [02:20<20:13,  3.57it/s]

Epoch: 1, Loss: 6.607167720794678


Processing epoch 00:  11%|█         | 520/4850 [02:20<20:22,  3.54it/s]

Epoch: 1, Loss: 7.076972961425781


Processing epoch 00:  11%|█         | 521/4850 [02:21<20:29,  3.52it/s]

Epoch: 1, Loss: 6.644147872924805


Processing epoch 00:  11%|█         | 522/4850 [02:21<20:34,  3.51it/s]

Epoch: 1, Loss: 6.747215270996094


Processing epoch 00:  11%|█         | 523/4850 [02:21<20:12,  3.57it/s]

Epoch: 1, Loss: 6.2150373458862305


Processing epoch 00:  11%|█         | 524/4850 [02:22<19:54,  3.62it/s]

Epoch: 1, Loss: 5.944024085998535


Processing epoch 00:  11%|█         | 525/4850 [02:22<19:41,  3.66it/s]

Epoch: 1, Loss: 6.634765148162842


Processing epoch 00:  11%|█         | 526/4850 [02:22<19:31,  3.69it/s]

Epoch: 1, Loss: 6.932135105133057


Processing epoch 00:  11%|█         | 527/4850 [02:22<19:37,  3.67it/s]

Epoch: 1, Loss: 6.703488349914551


Processing epoch 00:  11%|█         | 528/4850 [02:23<19:28,  3.70it/s]

Epoch: 1, Loss: 6.734650611877441


Processing epoch 00:  11%|█         | 529/4850 [02:23<19:18,  3.73it/s]

Epoch: 1, Loss: 6.851834297180176


Processing epoch 00:  11%|█         | 530/4850 [02:23<19:17,  3.73it/s]

Epoch: 1, Loss: 6.186145782470703


Processing epoch 00:  11%|█         | 531/4850 [02:23<19:11,  3.75it/s]

Epoch: 1, Loss: 6.5686492919921875


Processing epoch 00:  11%|█         | 532/4850 [02:24<19:09,  3.76it/s]

Epoch: 1, Loss: 6.570611953735352


Processing epoch 00:  11%|█         | 533/4850 [02:24<19:07,  3.76it/s]

Epoch: 1, Loss: 6.4281415939331055


Processing epoch 00:  11%|█         | 534/4850 [02:24<19:11,  3.75it/s]

Epoch: 1, Loss: 6.6672563552856445


Processing epoch 00:  11%|█         | 535/4850 [02:24<19:14,  3.74it/s]

Epoch: 1, Loss: 6.736073970794678


Processing epoch 00:  11%|█         | 536/4850 [02:25<19:22,  3.71it/s]

Epoch: 1, Loss: 6.520699501037598


Processing epoch 00:  11%|█         | 537/4850 [02:25<19:23,  3.71it/s]

Epoch: 1, Loss: 5.975124359130859


Processing epoch 00:  11%|█         | 538/4850 [02:25<19:15,  3.73it/s]

Epoch: 1, Loss: 6.289606094360352


Processing epoch 00:  11%|█         | 539/4850 [02:26<19:10,  3.75it/s]

Epoch: 1, Loss: 6.173393249511719


Processing epoch 00:  11%|█         | 540/4850 [02:26<19:07,  3.76it/s]

Epoch: 1, Loss: 6.626194477081299


Processing epoch 00:  11%|█         | 541/4850 [02:26<19:07,  3.75it/s]

Epoch: 1, Loss: 5.881353378295898


Processing epoch 00:  11%|█         | 542/4850 [02:26<19:04,  3.76it/s]

Epoch: 1, Loss: 6.434053421020508


Processing epoch 00:  11%|█         | 543/4850 [02:27<19:02,  3.77it/s]

Epoch: 1, Loss: 6.447417259216309


Processing epoch 00:  11%|█         | 544/4850 [02:27<19:07,  3.75it/s]

Epoch: 1, Loss: 6.316866874694824


Processing epoch 00:  11%|█         | 545/4850 [02:27<19:10,  3.74it/s]

Epoch: 1, Loss: 6.247250080108643


Processing epoch 00:  11%|█▏        | 546/4850 [02:27<19:07,  3.75it/s]

Epoch: 1, Loss: 6.620306968688965


Processing epoch 00:  11%|█▏        | 547/4850 [02:28<19:07,  3.75it/s]

Epoch: 1, Loss: 6.248140335083008


Processing epoch 00:  11%|█▏        | 548/4850 [02:28<19:03,  3.76it/s]

Epoch: 1, Loss: 6.59299898147583


Processing epoch 00:  11%|█▏        | 549/4850 [02:28<18:57,  3.78it/s]

Epoch: 1, Loss: 6.815435886383057


Processing epoch 00:  11%|█▏        | 550/4850 [02:28<19:08,  3.74it/s]

Epoch: 1, Loss: 6.308343887329102


Processing epoch 00:  11%|█▏        | 551/4850 [02:29<19:07,  3.75it/s]

Epoch: 1, Loss: 6.882843494415283


Processing epoch 00:  11%|█▏        | 552/4850 [02:29<19:10,  3.74it/s]

Epoch: 1, Loss: 6.371002197265625


Processing epoch 00:  11%|█▏        | 553/4850 [02:29<19:08,  3.74it/s]

Epoch: 1, Loss: 6.644813060760498


Processing epoch 00:  11%|█▏        | 554/4850 [02:30<19:13,  3.72it/s]

Epoch: 1, Loss: 6.401871681213379


Processing epoch 00:  11%|█▏        | 555/4850 [02:30<19:11,  3.73it/s]

Epoch: 1, Loss: 6.779883861541748


Processing epoch 00:  11%|█▏        | 556/4850 [02:30<19:14,  3.72it/s]

Epoch: 1, Loss: 6.4373369216918945


Processing epoch 00:  11%|█▏        | 557/4850 [02:30<19:08,  3.74it/s]

Epoch: 1, Loss: 6.303767681121826


Processing epoch 00:  12%|█▏        | 558/4850 [02:31<19:05,  3.75it/s]

Epoch: 1, Loss: 6.114699363708496


Processing epoch 00:  12%|█▏        | 559/4850 [02:31<19:03,  3.75it/s]

Epoch: 1, Loss: 6.523492336273193


Processing epoch 00:  12%|█▏        | 560/4850 [02:31<19:02,  3.75it/s]

Epoch: 1, Loss: 6.3190107345581055


Processing epoch 00:  12%|█▏        | 561/4850 [02:31<19:32,  3.66it/s]

Epoch: 1, Loss: 6.737152576446533


Processing epoch 00:  12%|█▏        | 562/4850 [02:32<19:46,  3.62it/s]

Epoch: 1, Loss: 6.476891994476318


Processing epoch 00:  12%|█▏        | 563/4850 [02:32<19:56,  3.58it/s]

Epoch: 1, Loss: 6.682485580444336


Processing epoch 00:  12%|█▏        | 564/4850 [02:32<20:05,  3.56it/s]

Epoch: 1, Loss: 6.199724197387695


Processing epoch 00:  12%|█▏        | 565/4850 [02:33<20:37,  3.46it/s]

Epoch: 1, Loss: 6.25792121887207


Processing epoch 00:  12%|█▏        | 566/4850 [02:33<20:37,  3.46it/s]

Epoch: 1, Loss: 6.4837212562561035


Processing epoch 00:  12%|█▏        | 567/4850 [02:33<20:41,  3.45it/s]

Epoch: 1, Loss: 6.496730804443359


Processing epoch 00:  12%|█▏        | 568/4850 [02:33<20:35,  3.47it/s]

Epoch: 1, Loss: 6.8983941078186035


Processing epoch 00:  12%|█▏        | 569/4850 [02:34<20:33,  3.47it/s]

Epoch: 1, Loss: 6.431454658508301


Processing epoch 00:  12%|█▏        | 570/4850 [02:34<20:01,  3.56it/s]

Epoch: 1, Loss: 6.9295735359191895


Processing epoch 00:  12%|█▏        | 571/4850 [02:34<19:41,  3.62it/s]

Epoch: 1, Loss: 6.359444618225098


Processing epoch 00:  12%|█▏        | 572/4850 [02:35<19:28,  3.66it/s]

Epoch: 1, Loss: 6.390620231628418


Processing epoch 00:  12%|█▏        | 573/4850 [02:35<19:20,  3.69it/s]

Epoch: 1, Loss: 6.784796237945557


Processing epoch 00:  12%|█▏        | 574/4850 [02:35<19:12,  3.71it/s]

Epoch: 1, Loss: 6.960112571716309


Processing epoch 00:  12%|█▏        | 575/4850 [02:35<19:10,  3.72it/s]

Epoch: 1, Loss: 6.263407230377197


Processing epoch 00:  12%|█▏        | 576/4850 [02:36<19:05,  3.73it/s]

Epoch: 1, Loss: 6.359702110290527


Processing epoch 00:  12%|█▏        | 577/4850 [02:36<19:05,  3.73it/s]

Epoch: 1, Loss: 6.205626964569092


Processing epoch 00:  12%|█▏        | 578/4850 [02:36<19:02,  3.74it/s]

Epoch: 1, Loss: 6.474068641662598


Processing epoch 00:  12%|█▏        | 579/4850 [02:36<18:58,  3.75it/s]

Epoch: 1, Loss: 6.44095516204834


Processing epoch 00:  12%|█▏        | 580/4850 [02:37<18:57,  3.76it/s]

Epoch: 1, Loss: 6.545230865478516


Processing epoch 00:  12%|█▏        | 581/4850 [02:37<18:54,  3.76it/s]

Epoch: 1, Loss: 6.324080467224121


Processing epoch 00:  12%|█▏        | 582/4850 [02:37<18:52,  3.77it/s]

Epoch: 1, Loss: 6.666528224945068


Processing epoch 00:  12%|█▏        | 583/4850 [02:37<18:54,  3.76it/s]

Epoch: 1, Loss: 6.023461818695068


Processing epoch 00:  12%|█▏        | 584/4850 [02:38<18:52,  3.77it/s]

Epoch: 1, Loss: 6.545876979827881


Processing epoch 00:  12%|█▏        | 585/4850 [02:38<18:55,  3.76it/s]

Epoch: 1, Loss: 6.884147644042969


Processing epoch 00:  12%|█▏        | 586/4850 [02:38<18:55,  3.75it/s]

Epoch: 1, Loss: 6.550517559051514


Processing epoch 00:  12%|█▏        | 587/4850 [02:39<18:55,  3.75it/s]

Epoch: 1, Loss: 6.445876121520996


Processing epoch 00:  12%|█▏        | 588/4850 [02:39<18:53,  3.76it/s]

Epoch: 1, Loss: 6.275479316711426


Processing epoch 00:  12%|█▏        | 589/4850 [02:39<18:51,  3.77it/s]

Epoch: 1, Loss: 6.001884460449219


Processing epoch 00:  12%|█▏        | 590/4850 [02:39<18:48,  3.77it/s]

Epoch: 1, Loss: 6.717641353607178


Processing epoch 00:  12%|█▏        | 591/4850 [02:40<18:48,  3.77it/s]

Epoch: 1, Loss: 5.810451507568359


Processing epoch 00:  12%|█▏        | 592/4850 [02:40<18:53,  3.76it/s]

Epoch: 1, Loss: 6.461491584777832


Processing epoch 00:  12%|█▏        | 593/4850 [02:40<18:56,  3.75it/s]

Epoch: 1, Loss: 5.93902063369751


Processing epoch 00:  12%|█▏        | 594/4850 [02:40<18:58,  3.74it/s]

Epoch: 1, Loss: 6.88817834854126


Processing epoch 00:  12%|█▏        | 595/4850 [02:41<18:59,  3.73it/s]

Epoch: 1, Loss: 5.868978500366211


Processing epoch 00:  12%|█▏        | 596/4850 [02:41<19:07,  3.71it/s]

Epoch: 1, Loss: 6.2405524253845215


Processing epoch 00:  12%|█▏        | 597/4850 [02:41<19:03,  3.72it/s]

Epoch: 1, Loss: 6.372178077697754


Processing epoch 00:  12%|█▏        | 598/4850 [02:41<19:02,  3.72it/s]

Epoch: 1, Loss: 6.332131385803223


Processing epoch 00:  12%|█▏        | 599/4850 [02:42<19:02,  3.72it/s]

Epoch: 1, Loss: 6.303668022155762


Processing epoch 00:  12%|█▏        | 600/4850 [02:42<18:56,  3.74it/s]

Epoch: 1, Loss: 6.719712257385254


Processing epoch 00:  12%|█▏        | 601/4850 [02:42<18:55,  3.74it/s]

Epoch: 1, Loss: 6.361541748046875


Processing epoch 00:  12%|█▏        | 602/4850 [02:43<18:48,  3.76it/s]

Epoch: 1, Loss: 6.472840309143066


Processing epoch 00:  12%|█▏        | 603/4850 [02:43<18:52,  3.75it/s]

Epoch: 1, Loss: 6.398598670959473


Processing epoch 00:  12%|█▏        | 604/4850 [02:43<18:53,  3.75it/s]

Epoch: 1, Loss: 6.702930927276611


Processing epoch 00:  12%|█▏        | 605/4850 [02:43<18:51,  3.75it/s]

Epoch: 1, Loss: 6.06644868850708


Processing epoch 00:  12%|█▏        | 606/4850 [02:44<18:51,  3.75it/s]

Epoch: 1, Loss: 6.919898986816406


Processing epoch 00:  13%|█▎        | 607/4850 [02:44<19:16,  3.67it/s]

Epoch: 1, Loss: 5.895569324493408


Processing epoch 00:  13%|█▎        | 608/4850 [02:44<19:26,  3.64it/s]

Epoch: 1, Loss: 6.60446834564209


Processing epoch 00:  13%|█▎        | 609/4850 [02:44<19:45,  3.58it/s]

Epoch: 1, Loss: 6.3558573722839355


Processing epoch 00:  13%|█▎        | 610/4850 [02:45<19:48,  3.57it/s]

Epoch: 1, Loss: 6.330057144165039


Processing epoch 00:  13%|█▎        | 611/4850 [02:45<20:04,  3.52it/s]

Epoch: 1, Loss: 6.249248504638672


Processing epoch 00:  13%|█▎        | 612/4850 [02:45<20:14,  3.49it/s]

Epoch: 1, Loss: 5.662086009979248


Processing epoch 00:  13%|█▎        | 613/4850 [02:46<20:28,  3.45it/s]

Epoch: 1, Loss: 6.224666118621826


Processing epoch 00:  13%|█▎        | 614/4850 [02:46<20:20,  3.47it/s]

Epoch: 1, Loss: 5.979988098144531


Processing epoch 00:  13%|█▎        | 615/4850 [02:46<20:12,  3.49it/s]

Epoch: 1, Loss: 6.642058372497559


Processing epoch 00:  13%|█▎        | 616/4850 [02:46<20:01,  3.52it/s]

Epoch: 1, Loss: 6.343278884887695


Processing epoch 00:  13%|█▎        | 617/4850 [02:47<19:44,  3.57it/s]

Epoch: 1, Loss: 5.958731174468994


Processing epoch 00:  13%|█▎        | 618/4850 [02:47<19:30,  3.62it/s]

Epoch: 1, Loss: 5.969202995300293


Processing epoch 00:  13%|█▎        | 619/4850 [02:47<19:18,  3.65it/s]

Epoch: 1, Loss: 5.89870023727417


Processing epoch 00:  13%|█▎        | 620/4850 [02:48<19:18,  3.65it/s]

Epoch: 1, Loss: 6.10145378112793


Processing epoch 00:  13%|█▎        | 621/4850 [02:48<19:08,  3.68it/s]

Epoch: 1, Loss: 6.022271633148193


Processing epoch 00:  13%|█▎        | 622/4850 [02:48<19:02,  3.70it/s]

Epoch: 1, Loss: 5.470555305480957


Processing epoch 00:  13%|█▎        | 623/4850 [02:48<18:48,  3.75it/s]

Epoch: 1, Loss: 6.998174667358398


Processing epoch 00:  13%|█▎        | 624/4850 [02:49<18:47,  3.75it/s]

Epoch: 1, Loss: 5.6404500007629395


Processing epoch 00:  13%|█▎        | 625/4850 [02:49<18:44,  3.76it/s]

Epoch: 1, Loss: 5.440495014190674


Processing epoch 00:  13%|█▎        | 626/4850 [02:49<18:47,  3.75it/s]

Epoch: 1, Loss: 6.1209635734558105


Processing epoch 00:  13%|█▎        | 627/4850 [02:49<18:48,  3.74it/s]

Epoch: 1, Loss: 6.63785982131958


Processing epoch 00:  13%|█▎        | 628/4850 [02:50<18:49,  3.74it/s]

Epoch: 1, Loss: 5.321812152862549


Processing epoch 00:  13%|█▎        | 629/4850 [02:50<18:48,  3.74it/s]

Epoch: 1, Loss: 5.87686824798584


Processing epoch 00:  13%|█▎        | 630/4850 [02:50<18:55,  3.72it/s]

Epoch: 1, Loss: 6.380162715911865


Processing epoch 00:  13%|█▎        | 631/4850 [02:50<18:50,  3.73it/s]

Epoch: 1, Loss: 6.596479415893555


Processing epoch 00:  13%|█▎        | 632/4850 [02:51<18:49,  3.73it/s]

Epoch: 1, Loss: 6.739082336425781


Processing epoch 00:  13%|█▎        | 633/4850 [02:51<18:45,  3.75it/s]

Epoch: 1, Loss: 6.210653781890869


Processing epoch 00:  13%|█▎        | 634/4850 [02:51<18:39,  3.76it/s]

Epoch: 1, Loss: 6.125352382659912


Processing epoch 00:  13%|█▎        | 635/4850 [02:52<18:37,  3.77it/s]

Epoch: 1, Loss: 6.348127365112305


Processing epoch 00:  13%|█▎        | 636/4850 [02:52<18:39,  3.76it/s]

Epoch: 1, Loss: 6.3628950119018555


Processing epoch 00:  13%|█▎        | 637/4850 [02:52<18:34,  3.78it/s]

Epoch: 1, Loss: 6.425208568572998


Processing epoch 00:  13%|█▎        | 638/4850 [02:52<18:34,  3.78it/s]

Epoch: 1, Loss: 5.667815208435059


Processing epoch 00:  13%|█▎        | 639/4850 [02:53<18:40,  3.76it/s]

Epoch: 1, Loss: 5.336430549621582


Processing epoch 00:  13%|█▎        | 640/4850 [02:53<18:34,  3.78it/s]

Epoch: 1, Loss: 6.507926940917969


Processing epoch 00:  13%|█▎        | 641/4850 [02:53<18:34,  3.78it/s]

Epoch: 1, Loss: 6.406643867492676


Processing epoch 00:  13%|█▎        | 642/4850 [02:53<18:46,  3.74it/s]

Epoch: 1, Loss: 5.508322715759277


Processing epoch 00:  13%|█▎        | 643/4850 [02:54<18:44,  3.74it/s]

Epoch: 1, Loss: 6.275038719177246


Processing epoch 00:  13%|█▎        | 644/4850 [02:54<18:43,  3.74it/s]

Epoch: 1, Loss: 6.085202217102051


Processing epoch 00:  13%|█▎        | 645/4850 [02:54<18:39,  3.76it/s]

Epoch: 1, Loss: 6.254434585571289


Processing epoch 00:  13%|█▎        | 646/4850 [02:54<18:39,  3.76it/s]

Epoch: 1, Loss: 6.303253173828125


Processing epoch 00:  13%|█▎        | 647/4850 [02:55<18:38,  3.76it/s]

Epoch: 1, Loss: 6.505863666534424


Processing epoch 00:  13%|█▎        | 648/4850 [02:55<18:37,  3.76it/s]

Epoch: 1, Loss: 6.371150016784668


Processing epoch 00:  13%|█▎        | 649/4850 [02:55<18:34,  3.77it/s]

Epoch: 1, Loss: 6.7928314208984375


Processing epoch 00:  13%|█▎        | 650/4850 [02:56<18:37,  3.76it/s]

Epoch: 1, Loss: 6.1207451820373535


Processing epoch 00:  13%|█▎        | 651/4850 [02:56<18:38,  3.75it/s]

Epoch: 1, Loss: 6.14650297164917


Processing epoch 00:  13%|█▎        | 652/4850 [02:56<18:39,  3.75it/s]

Epoch: 1, Loss: 6.412924289703369


Processing epoch 00:  13%|█▎        | 653/4850 [02:56<18:49,  3.72it/s]

Epoch: 1, Loss: 6.016753673553467


Processing epoch 00:  13%|█▎        | 654/4850 [02:57<19:10,  3.65it/s]

Epoch: 1, Loss: 5.705892562866211


Processing epoch 00:  14%|█▎        | 655/4850 [02:57<19:22,  3.61it/s]

Epoch: 1, Loss: 6.117173194885254


Processing epoch 00:  14%|█▎        | 656/4850 [02:57<19:28,  3.59it/s]

Epoch: 1, Loss: 6.260951995849609


Processing epoch 00:  14%|█▎        | 657/4850 [02:57<19:31,  3.58it/s]

Epoch: 1, Loss: 6.4037766456604


Processing epoch 00:  14%|█▎        | 658/4850 [02:58<19:35,  3.57it/s]

Epoch: 1, Loss: 5.232573986053467


Processing epoch 00:  14%|█▎        | 659/4850 [02:58<19:43,  3.54it/s]

Epoch: 1, Loss: 6.043705940246582


Processing epoch 00:  14%|█▎        | 660/4850 [02:58<19:47,  3.53it/s]

Epoch: 1, Loss: 5.892780780792236


Processing epoch 00:  14%|█▎        | 661/4850 [02:59<20:00,  3.49it/s]

Epoch: 1, Loss: 5.9019927978515625


Processing epoch 00:  14%|█▎        | 662/4850 [02:59<19:54,  3.51it/s]

Epoch: 1, Loss: 6.341666221618652


Processing epoch 00:  14%|█▎        | 663/4850 [02:59<19:47,  3.53it/s]

Epoch: 1, Loss: 6.429452419281006


Processing epoch 00:  14%|█▎        | 664/4850 [02:59<19:36,  3.56it/s]

Epoch: 1, Loss: 5.784137725830078


Processing epoch 00:  14%|█▎        | 665/4850 [03:00<19:18,  3.61it/s]

Epoch: 1, Loss: 6.067327499389648


Processing epoch 00:  14%|█▎        | 666/4850 [03:00<19:11,  3.63it/s]

Epoch: 1, Loss: 5.786440372467041


Processing epoch 00:  14%|█▍        | 667/4850 [03:00<19:03,  3.66it/s]

Epoch: 1, Loss: 5.673285484313965


Processing epoch 00:  14%|█▍        | 668/4850 [03:01<19:01,  3.66it/s]

Epoch: 1, Loss: 6.294145584106445


Processing epoch 00:  14%|█▍        | 669/4850 [03:01<18:57,  3.67it/s]

Epoch: 1, Loss: 5.990184307098389


Processing epoch 00:  14%|█▍        | 670/4850 [03:01<18:51,  3.69it/s]

Epoch: 1, Loss: 6.198551177978516


Processing epoch 00:  14%|█▍        | 671/4850 [03:01<18:43,  3.72it/s]

Epoch: 1, Loss: 6.431776523590088


Processing epoch 00:  14%|█▍        | 672/4850 [03:02<18:38,  3.73it/s]

Epoch: 1, Loss: 5.947335243225098


Processing epoch 00:  14%|█▍        | 673/4850 [03:02<18:35,  3.74it/s]

Epoch: 1, Loss: 6.0583415031433105


Processing epoch 00:  14%|█▍        | 674/4850 [03:02<18:34,  3.75it/s]

Epoch: 1, Loss: 6.616268634796143


Processing epoch 00:  14%|█▍        | 675/4850 [03:02<18:37,  3.74it/s]

Epoch: 1, Loss: 5.689711570739746


Processing epoch 00:  14%|█▍        | 676/4850 [03:03<18:36,  3.74it/s]

Epoch: 1, Loss: 5.859222888946533


Processing epoch 00:  14%|█▍        | 677/4850 [03:03<18:41,  3.72it/s]

Epoch: 1, Loss: 5.352124214172363


Processing epoch 00:  14%|█▍        | 678/4850 [03:03<18:39,  3.73it/s]

Epoch: 1, Loss: 6.6239848136901855


Processing epoch 00:  14%|█▍        | 679/4850 [03:03<18:38,  3.73it/s]

Epoch: 1, Loss: 6.042186737060547


Processing epoch 00:  14%|█▍        | 680/4850 [03:04<19:02,  3.65it/s]

Epoch: 1, Loss: 6.34531831741333


Processing epoch 00:  14%|█▍        | 681/4850 [03:04<19:15,  3.61it/s]

Epoch: 1, Loss: 5.899648189544678


Processing epoch 00:  14%|█▍        | 682/4850 [03:04<19:24,  3.58it/s]

Epoch: 1, Loss: 6.013056755065918


Processing epoch 00:  14%|█▍        | 683/4850 [03:05<19:47,  3.51it/s]

Epoch: 1, Loss: 5.773685455322266


Processing epoch 00:  14%|█▍        | 684/4850 [03:05<19:49,  3.50it/s]

Epoch: 1, Loss: 6.062295436859131


Processing epoch 00:  14%|█▍        | 685/4850 [03:05<19:51,  3.50it/s]

Epoch: 1, Loss: 5.694936752319336


Processing epoch 00:  14%|█▍        | 686/4850 [03:05<19:54,  3.49it/s]

Epoch: 1, Loss: 5.253240585327148


Processing epoch 00:  14%|█▍        | 687/4850 [03:06<19:54,  3.49it/s]

Epoch: 1, Loss: 6.411655902862549


Processing epoch 00:  14%|█▍        | 688/4850 [03:06<19:53,  3.49it/s]

Epoch: 1, Loss: 6.551084995269775


Processing epoch 00:  14%|█▍        | 689/4850 [03:06<19:26,  3.57it/s]

Epoch: 1, Loss: 6.128872871398926


Processing epoch 00:  14%|█▍        | 690/4850 [03:07<19:08,  3.62it/s]

Epoch: 1, Loss: 6.094626426696777


Processing epoch 00:  14%|█▍        | 691/4850 [03:07<18:55,  3.66it/s]

Epoch: 1, Loss: 5.873183250427246


Processing epoch 00:  14%|█▍        | 692/4850 [03:07<18:47,  3.69it/s]

Epoch: 1, Loss: 5.485100746154785


Processing epoch 00:  14%|█▍        | 693/4850 [03:07<18:38,  3.72it/s]

Epoch: 1, Loss: 6.264166831970215


Processing epoch 00:  14%|█▍        | 694/4850 [03:08<18:40,  3.71it/s]

Epoch: 1, Loss: 5.80014705657959


Processing epoch 00:  14%|█▍        | 695/4850 [03:08<18:34,  3.73it/s]

Epoch: 1, Loss: 6.261040687561035


Processing epoch 00:  14%|█▍        | 696/4850 [03:08<18:28,  3.75it/s]

Epoch: 1, Loss: 6.024165153503418


Processing epoch 00:  14%|█▍        | 697/4850 [03:08<18:27,  3.75it/s]

Epoch: 1, Loss: 5.7416768074035645


Processing epoch 00:  14%|█▍        | 698/4850 [03:09<18:38,  3.71it/s]

Epoch: 1, Loss: 6.745516777038574


Processing epoch 00:  14%|█▍        | 699/4850 [03:09<18:35,  3.72it/s]

Epoch: 1, Loss: 6.383727073669434


Processing epoch 00:  14%|█▍        | 700/4850 [03:09<19:03,  3.63it/s]

Epoch: 1, Loss: 5.95649528503418


Processing epoch 00:  14%|█▍        | 701/4850 [03:10<19:16,  3.59it/s]

Epoch: 1, Loss: 6.031628608703613


Processing epoch 00:  14%|█▍        | 702/4850 [03:10<19:22,  3.57it/s]

Epoch: 1, Loss: 5.446193218231201


Processing epoch 00:  14%|█▍        | 703/4850 [03:10<19:26,  3.56it/s]

Epoch: 1, Loss: 5.470812797546387


Processing epoch 00:  15%|█▍        | 704/4850 [03:10<19:29,  3.55it/s]

Epoch: 1, Loss: 5.873570919036865


Processing epoch 00:  15%|█▍        | 705/4850 [03:11<19:36,  3.52it/s]

Epoch: 1, Loss: 5.945913314819336


Processing epoch 00:  15%|█▍        | 706/4850 [03:11<19:36,  3.52it/s]

Epoch: 1, Loss: 6.208678722381592


Processing epoch 00:  15%|█▍        | 707/4850 [03:11<19:39,  3.51it/s]

Epoch: 1, Loss: 5.176278114318848


Processing epoch 00:  15%|█▍        | 708/4850 [03:12<19:52,  3.47it/s]

Epoch: 1, Loss: 6.16996955871582


Processing epoch 00:  15%|█▍        | 709/4850 [03:12<19:50,  3.48it/s]

Epoch: 1, Loss: 4.668285369873047


Processing epoch 00:  15%|█▍        | 710/4850 [03:12<19:21,  3.56it/s]

Epoch: 1, Loss: 6.136198043823242


Processing epoch 00:  15%|█▍        | 711/4850 [03:12<19:04,  3.62it/s]

Epoch: 1, Loss: 5.996565818786621


Processing epoch 00:  15%|█▍        | 712/4850 [03:13<18:45,  3.68it/s]

Epoch: 1, Loss: 6.648280620574951


Processing epoch 00:  15%|█▍        | 713/4850 [03:13<18:38,  3.70it/s]

Epoch: 1, Loss: 6.266340255737305


Processing epoch 00:  15%|█▍        | 714/4850 [03:13<18:37,  3.70it/s]

Epoch: 1, Loss: 5.587380409240723


Processing epoch 00:  15%|█▍        | 715/4850 [03:13<18:33,  3.71it/s]

Epoch: 1, Loss: 5.752388954162598


Processing epoch 00:  15%|█▍        | 716/4850 [03:14<18:30,  3.72it/s]

Epoch: 1, Loss: 5.704208850860596


Processing epoch 00:  15%|█▍        | 717/4850 [03:14<18:28,  3.73it/s]

Epoch: 1, Loss: 6.033210754394531


Processing epoch 00:  15%|█▍        | 718/4850 [03:14<18:30,  3.72it/s]

Epoch: 1, Loss: 6.227128982543945


Processing epoch 00:  15%|█▍        | 719/4850 [03:15<18:38,  3.69it/s]

Epoch: 1, Loss: 6.483540058135986


Processing epoch 00:  15%|█▍        | 720/4850 [03:15<18:36,  3.70it/s]

Epoch: 1, Loss: 6.053262710571289


Processing epoch 00:  15%|█▍        | 721/4850 [03:15<18:33,  3.71it/s]

Epoch: 1, Loss: 5.598803997039795


Processing epoch 00:  15%|█▍        | 722/4850 [03:15<18:30,  3.72it/s]

Epoch: 1, Loss: 6.32462215423584


Processing epoch 00:  15%|█▍        | 723/4850 [03:16<18:29,  3.72it/s]

Epoch: 1, Loss: 5.264463901519775


Processing epoch 00:  15%|█▍        | 724/4850 [03:16<18:27,  3.73it/s]

Epoch: 1, Loss: 5.772608757019043


Processing epoch 00:  15%|█▍        | 725/4850 [03:16<18:20,  3.75it/s]

Epoch: 1, Loss: 6.172950744628906


Processing epoch 00:  15%|█▍        | 726/4850 [03:16<18:19,  3.75it/s]

Epoch: 1, Loss: 6.358369827270508


Processing epoch 00:  15%|█▍        | 727/4850 [03:17<18:19,  3.75it/s]

Epoch: 1, Loss: 6.353611946105957


Processing epoch 00:  15%|█▌        | 728/4850 [03:17<18:27,  3.72it/s]

Epoch: 1, Loss: 6.437073230743408


Processing epoch 00:  15%|█▌        | 729/4850 [03:17<18:26,  3.72it/s]

Epoch: 1, Loss: 5.404937744140625


Processing epoch 00:  15%|█▌        | 730/4850 [03:17<18:21,  3.74it/s]

Epoch: 1, Loss: 6.029134750366211


Processing epoch 00:  15%|█▌        | 731/4850 [03:18<18:31,  3.71it/s]

Epoch: 1, Loss: 5.932032108306885


Processing epoch 00:  15%|█▌        | 732/4850 [03:18<18:40,  3.68it/s]

Epoch: 1, Loss: 6.178983688354492


Processing epoch 00:  15%|█▌        | 733/4850 [03:18<18:36,  3.69it/s]

Epoch: 1, Loss: 5.709138870239258


Processing epoch 00:  15%|█▌        | 734/4850 [03:19<18:31,  3.70it/s]

Epoch: 1, Loss: 6.305192947387695


Processing epoch 00:  15%|█▌        | 735/4850 [03:19<18:32,  3.70it/s]

Epoch: 1, Loss: 5.980301380157471


Processing epoch 00:  15%|█▌        | 736/4850 [03:19<18:22,  3.73it/s]

Epoch: 1, Loss: 6.523417949676514


Processing epoch 00:  15%|█▌        | 737/4850 [03:19<18:24,  3.73it/s]

Epoch: 1, Loss: 5.711699962615967


Processing epoch 00:  15%|█▌        | 738/4850 [03:20<18:27,  3.71it/s]

Epoch: 1, Loss: 6.112654209136963


Processing epoch 00:  15%|█▌        | 739/4850 [03:20<18:27,  3.71it/s]

Epoch: 1, Loss: 6.16099739074707


Processing epoch 00:  15%|█▌        | 740/4850 [03:20<18:22,  3.73it/s]

Epoch: 1, Loss: 6.5046234130859375


Processing epoch 00:  15%|█▌        | 741/4850 [03:20<18:16,  3.75it/s]

Epoch: 1, Loss: 6.489518165588379


Processing epoch 00:  15%|█▌        | 742/4850 [03:21<18:13,  3.76it/s]

Epoch: 1, Loss: 5.915688514709473


Processing epoch 00:  15%|█▌        | 743/4850 [03:21<18:12,  3.76it/s]

Epoch: 1, Loss: 5.733116626739502


Processing epoch 00:  15%|█▌        | 744/4850 [03:21<18:12,  3.76it/s]

Epoch: 1, Loss: 5.43527364730835


Processing epoch 00:  15%|█▌        | 745/4850 [03:22<18:12,  3.76it/s]

Epoch: 1, Loss: 5.231462001800537


Processing epoch 00:  15%|█▌        | 746/4850 [03:22<18:15,  3.75it/s]

Epoch: 1, Loss: 5.492738246917725


Processing epoch 00:  15%|█▌        | 747/4850 [03:22<18:45,  3.65it/s]

Epoch: 1, Loss: 6.0053791999816895


Processing epoch 00:  15%|█▌        | 748/4850 [03:22<18:51,  3.63it/s]

Epoch: 1, Loss: 5.836516380310059


Processing epoch 00:  15%|█▌        | 749/4850 [03:23<18:59,  3.60it/s]

Epoch: 1, Loss: 5.698110580444336


Processing epoch 00:  15%|█▌        | 750/4850 [03:23<18:54,  3.61it/s]

Epoch: 1, Loss: 6.619357109069824


Processing epoch 00:  15%|█▌        | 751/4850 [03:23<19:09,  3.57it/s]

Epoch: 1, Loss: 5.705069541931152


Processing epoch 00:  16%|█▌        | 752/4850 [03:23<19:17,  3.54it/s]

Epoch: 1, Loss: 5.723758220672607


Processing epoch 00:  16%|█▌        | 753/4850 [03:24<19:23,  3.52it/s]

Epoch: 1, Loss: 6.43971061706543


Processing epoch 00:  16%|█▌        | 754/4850 [03:24<19:25,  3.51it/s]

Epoch: 1, Loss: 5.896166801452637


Processing epoch 00:  16%|█▌        | 755/4850 [03:24<19:43,  3.46it/s]

Epoch: 1, Loss: 5.308977127075195


Processing epoch 00:  16%|█▌        | 756/4850 [03:25<19:14,  3.55it/s]

Epoch: 1, Loss: 6.132684707641602


Processing epoch 00:  16%|█▌        | 757/4850 [03:25<18:57,  3.60it/s]

Epoch: 1, Loss: 5.774305820465088


Processing epoch 00:  16%|█▌        | 758/4850 [03:25<18:41,  3.65it/s]

Epoch: 1, Loss: 6.110367774963379


Processing epoch 00:  16%|█▌        | 759/4850 [03:25<18:30,  3.68it/s]

Epoch: 1, Loss: 5.618971824645996


Processing epoch 00:  16%|█▌        | 760/4850 [03:26<18:29,  3.68it/s]

Epoch: 1, Loss: 5.745286464691162


Processing epoch 00:  16%|█▌        | 761/4850 [03:26<18:24,  3.70it/s]

Epoch: 1, Loss: 6.610698223114014


Processing epoch 00:  16%|█▌        | 762/4850 [03:26<18:22,  3.71it/s]

Epoch: 1, Loss: 6.0302653312683105


Processing epoch 00:  16%|█▌        | 763/4850 [03:27<18:15,  3.73it/s]

Epoch: 1, Loss: 6.127655506134033


Processing epoch 00:  16%|█▌        | 764/4850 [03:27<18:19,  3.72it/s]

Epoch: 1, Loss: 5.9069504737854


Processing epoch 00:  16%|█▌        | 765/4850 [03:27<18:22,  3.71it/s]

Epoch: 1, Loss: 5.340927600860596


Processing epoch 00:  16%|█▌        | 766/4850 [03:27<18:26,  3.69it/s]

Epoch: 1, Loss: 6.412545680999756


Processing epoch 00:  16%|█▌        | 767/4850 [03:28<18:24,  3.70it/s]

Epoch: 1, Loss: 5.937623977661133


Processing epoch 00:  16%|█▌        | 768/4850 [03:28<18:20,  3.71it/s]

Epoch: 1, Loss: 6.288593292236328


Processing epoch 00:  16%|█▌        | 769/4850 [03:28<18:14,  3.73it/s]

Epoch: 1, Loss: 5.934539318084717


Processing epoch 00:  16%|█▌        | 770/4850 [03:28<18:12,  3.73it/s]

Epoch: 1, Loss: 5.995781421661377


Processing epoch 00:  16%|█▌        | 771/4850 [03:29<18:10,  3.74it/s]

Epoch: 1, Loss: 6.522665023803711


Processing epoch 00:  16%|█▌        | 772/4850 [03:29<18:12,  3.73it/s]

Epoch: 1, Loss: 5.811290740966797


Processing epoch 00:  16%|█▌        | 773/4850 [03:29<18:14,  3.73it/s]

Epoch: 1, Loss: 4.834379196166992


Processing epoch 00:  16%|█▌        | 774/4850 [03:29<18:12,  3.73it/s]

Epoch: 1, Loss: 5.242451190948486


Processing epoch 00:  16%|█▌        | 775/4850 [03:30<18:13,  3.73it/s]

Epoch: 1, Loss: 4.746759414672852


Processing epoch 00:  16%|█▌        | 776/4850 [03:30<18:17,  3.71it/s]

Epoch: 1, Loss: 5.4190473556518555


Processing epoch 00:  16%|█▌        | 777/4850 [03:30<18:12,  3.73it/s]

Epoch: 1, Loss: 5.540923118591309


Processing epoch 00:  16%|█▌        | 778/4850 [03:31<18:14,  3.72it/s]

Epoch: 1, Loss: 5.607327461242676


Processing epoch 00:  16%|█▌        | 779/4850 [03:31<18:14,  3.72it/s]

Epoch: 1, Loss: 5.922813415527344


Processing epoch 00:  16%|█▌        | 780/4850 [03:31<18:14,  3.72it/s]

Epoch: 1, Loss: 6.198308944702148


Processing epoch 00:  16%|█▌        | 781/4850 [03:31<18:09,  3.73it/s]

Epoch: 1, Loss: 6.103690147399902


Processing epoch 00:  16%|█▌        | 782/4850 [03:32<18:06,  3.75it/s]

Epoch: 1, Loss: 5.773867607116699


Processing epoch 00:  16%|█▌        | 783/4850 [03:32<18:02,  3.76it/s]

Epoch: 1, Loss: 5.650754928588867


Processing epoch 00:  16%|█▌        | 784/4850 [03:32<18:08,  3.74it/s]

Epoch: 1, Loss: 5.497598171234131


Processing epoch 00:  16%|█▌        | 785/4850 [03:32<18:16,  3.71it/s]

Epoch: 1, Loss: 5.488642692565918


Processing epoch 00:  16%|█▌        | 786/4850 [03:33<18:14,  3.71it/s]

Epoch: 1, Loss: 5.51539421081543


Processing epoch 00:  16%|█▌        | 787/4850 [03:33<18:09,  3.73it/s]

Epoch: 1, Loss: 5.691097259521484


Processing epoch 00:  16%|█▌        | 788/4850 [03:33<18:11,  3.72it/s]

Epoch: 1, Loss: 5.209531784057617


Processing epoch 00:  16%|█▋        | 789/4850 [03:33<18:11,  3.72it/s]

Epoch: 1, Loss: 5.923221111297607


Processing epoch 00:  16%|█▋        | 790/4850 [03:34<18:13,  3.71it/s]

Epoch: 1, Loss: 5.129703521728516


Processing epoch 00:  16%|█▋        | 791/4850 [03:34<18:14,  3.71it/s]

Epoch: 1, Loss: 5.466774940490723


Processing epoch 00:  16%|█▋        | 792/4850 [03:34<18:11,  3.72it/s]

Epoch: 1, Loss: 5.581324100494385


Processing epoch 00:  16%|█▋        | 793/4850 [03:35<18:27,  3.66it/s]

Epoch: 1, Loss: 6.248805999755859


Processing epoch 00:  16%|█▋        | 794/4850 [03:35<18:41,  3.62it/s]

Epoch: 1, Loss: 5.798006057739258


Processing epoch 00:  16%|█▋        | 795/4850 [03:35<18:49,  3.59it/s]

Epoch: 1, Loss: 5.588165760040283


Processing epoch 00:  16%|█▋        | 796/4850 [03:35<18:58,  3.56it/s]

Epoch: 1, Loss: 5.992162704467773


Processing epoch 00:  16%|█▋        | 797/4850 [03:36<19:07,  3.53it/s]

Epoch: 1, Loss: 5.314023017883301


Processing epoch 00:  16%|█▋        | 798/4850 [03:36<19:26,  3.47it/s]

Epoch: 1, Loss: 5.6634979248046875


Processing epoch 00:  16%|█▋        | 799/4850 [03:36<19:35,  3.45it/s]

Epoch: 1, Loss: 5.848188400268555


Processing epoch 00:  16%|█▋        | 800/4850 [03:37<19:46,  3.41it/s]

Epoch: 1, Loss: 4.648588180541992


Processing epoch 00:  17%|█▋        | 801/4850 [03:37<19:34,  3.45it/s]

Epoch: 1, Loss: 5.742588520050049


Processing epoch 00:  17%|█▋        | 802/4850 [03:37<19:10,  3.52it/s]

Epoch: 1, Loss: 5.643729209899902


Processing epoch 00:  17%|█▋        | 803/4850 [03:37<18:47,  3.59it/s]

Epoch: 1, Loss: 6.086419582366943


Processing epoch 00:  17%|█▋        | 804/4850 [03:38<18:36,  3.63it/s]

Epoch: 1, Loss: 5.73992395401001


Processing epoch 00:  17%|█▋        | 805/4850 [03:38<18:19,  3.68it/s]

Epoch: 1, Loss: 6.525379657745361


Processing epoch 00:  17%|█▋        | 806/4850 [03:38<18:13,  3.70it/s]

Epoch: 1, Loss: 5.276035308837891


Processing epoch 00:  17%|█▋        | 807/4850 [03:39<18:10,  3.71it/s]

Epoch: 1, Loss: 5.181004524230957


Processing epoch 00:  17%|█▋        | 808/4850 [03:39<18:08,  3.71it/s]

Epoch: 1, Loss: 5.721010684967041


Processing epoch 00:  17%|█▋        | 809/4850 [03:39<18:10,  3.71it/s]

Epoch: 1, Loss: 5.242812156677246


Processing epoch 00:  17%|█▋        | 810/4850 [03:39<18:09,  3.71it/s]

Epoch: 1, Loss: 5.380908012390137


Processing epoch 00:  17%|█▋        | 811/4850 [03:40<18:07,  3.72it/s]

Epoch: 1, Loss: 5.828301429748535


Processing epoch 00:  17%|█▋        | 812/4850 [03:40<18:05,  3.72it/s]

Epoch: 1, Loss: 5.620200157165527


Processing epoch 00:  17%|█▋        | 813/4850 [03:40<18:03,  3.72it/s]

Epoch: 1, Loss: 6.358025550842285


Processing epoch 00:  17%|█▋        | 814/4850 [03:40<18:00,  3.74it/s]

Epoch: 1, Loss: 5.462779998779297


Processing epoch 00:  17%|█▋        | 815/4850 [03:41<18:05,  3.72it/s]

Epoch: 1, Loss: 6.118653297424316


Processing epoch 00:  17%|█▋        | 816/4850 [03:41<18:05,  3.72it/s]

Epoch: 1, Loss: 5.59197998046875


Processing epoch 00:  17%|█▋        | 817/4850 [03:41<18:02,  3.73it/s]

Epoch: 1, Loss: 5.749825477600098


Processing epoch 00:  17%|█▋        | 818/4850 [03:41<18:04,  3.72it/s]

Epoch: 1, Loss: 5.085671901702881


Processing epoch 00:  17%|█▋        | 819/4850 [03:42<18:15,  3.68it/s]

Epoch: 1, Loss: 5.376682758331299


Processing epoch 00:  17%|█▋        | 820/4850 [03:42<18:06,  3.71it/s]

Epoch: 1, Loss: 5.736278057098389


Processing epoch 00:  17%|█▋        | 821/4850 [03:42<18:03,  3.72it/s]

Epoch: 1, Loss: 5.95500373840332


Processing epoch 00:  17%|█▋        | 822/4850 [03:43<18:04,  3.71it/s]

Epoch: 1, Loss: 5.698660850524902


Processing epoch 00:  17%|█▋        | 823/4850 [03:43<18:03,  3.72it/s]

Epoch: 1, Loss: 5.655964374542236


Processing epoch 00:  17%|█▋        | 824/4850 [03:43<18:05,  3.71it/s]

Epoch: 1, Loss: 5.705010414123535


Processing epoch 00:  17%|█▋        | 825/4850 [03:43<18:05,  3.71it/s]

Epoch: 1, Loss: 5.48532772064209


Processing epoch 00:  17%|█▋        | 826/4850 [03:44<18:03,  3.71it/s]

Epoch: 1, Loss: 5.527039051055908


Processing epoch 00:  17%|█▋        | 827/4850 [03:44<18:06,  3.70it/s]

Epoch: 1, Loss: 5.132894992828369


Processing epoch 00:  17%|█▋        | 828/4850 [03:44<17:59,  3.73it/s]

Epoch: 1, Loss: 5.9668169021606445


Processing epoch 00:  17%|█▋        | 829/4850 [03:44<18:05,  3.71it/s]

Epoch: 1, Loss: 4.9695844650268555


Processing epoch 00:  17%|█▋        | 830/4850 [03:45<18:05,  3.70it/s]

Epoch: 1, Loss: 5.791064262390137


Processing epoch 00:  17%|█▋        | 831/4850 [03:45<18:00,  3.72it/s]

Epoch: 1, Loss: 5.969286918640137


Processing epoch 00:  17%|█▋        | 832/4850 [03:45<17:55,  3.73it/s]

Epoch: 1, Loss: 5.843389987945557


Processing epoch 00:  17%|█▋        | 833/4850 [03:45<17:56,  3.73it/s]

Epoch: 1, Loss: 5.059661388397217


Processing epoch 00:  17%|█▋        | 834/4850 [03:46<18:01,  3.71it/s]

Epoch: 1, Loss: 5.918044090270996


Processing epoch 00:  17%|█▋        | 835/4850 [03:46<17:57,  3.73it/s]

Epoch: 1, Loss: 6.158054828643799


Processing epoch 00:  17%|█▋        | 836/4850 [03:46<17:55,  3.73it/s]

Epoch: 1, Loss: 5.199983596801758


Processing epoch 00:  17%|█▋        | 837/4850 [03:47<17:54,  3.73it/s]

Epoch: 1, Loss: 5.351099014282227


Processing epoch 00:  17%|█▋        | 838/4850 [03:47<18:04,  3.70it/s]

Epoch: 1, Loss: 5.120318412780762


Processing epoch 00:  17%|█▋        | 839/4850 [03:47<18:19,  3.65it/s]

Epoch: 1, Loss: 5.510780334472656


Processing epoch 00:  17%|█▋        | 840/4850 [03:47<18:34,  3.60it/s]

Epoch: 1, Loss: 5.333611965179443


Processing epoch 00:  17%|█▋        | 841/4850 [03:48<18:45,  3.56it/s]

Epoch: 1, Loss: 4.0145368576049805


Processing epoch 00:  17%|█▋        | 842/4850 [03:48<18:56,  3.53it/s]

Epoch: 1, Loss: 5.865253925323486


Processing epoch 00:  17%|█▋        | 843/4850 [03:48<18:52,  3.54it/s]

Epoch: 1, Loss: 5.536722660064697


Processing epoch 00:  17%|█▋        | 844/4850 [03:49<18:43,  3.57it/s]

Epoch: 1, Loss: 6.686062812805176


Processing epoch 00:  17%|█▋        | 845/4850 [03:49<18:54,  3.53it/s]

Epoch: 1, Loss: 5.266258239746094


Processing epoch 00:  17%|█▋        | 846/4850 [03:49<18:58,  3.52it/s]

Epoch: 1, Loss: 5.670724391937256


Processing epoch 00:  17%|█▋        | 847/4850 [03:49<19:07,  3.49it/s]

Epoch: 1, Loss: 5.67125129699707


Processing epoch 00:  17%|█▋        | 848/4850 [03:50<19:17,  3.46it/s]

Epoch: 1, Loss: 5.608412265777588


Processing epoch 00:  18%|█▊        | 849/4850 [03:50<18:56,  3.52it/s]

Epoch: 1, Loss: 5.017451286315918


Processing epoch 00:  18%|█▊        | 850/4850 [03:50<18:36,  3.58it/s]

Epoch: 1, Loss: 4.769345283508301


Processing epoch 00:  18%|█▊        | 851/4850 [03:51<18:14,  3.65it/s]

Epoch: 1, Loss: 6.535656929016113


Processing epoch 00:  18%|█▊        | 852/4850 [03:51<18:04,  3.69it/s]

Epoch: 1, Loss: 5.699786186218262


Processing epoch 00:  18%|█▊        | 853/4850 [03:51<17:59,  3.70it/s]

Epoch: 1, Loss: 6.193509578704834


Processing epoch 00:  18%|█▊        | 854/4850 [03:51<17:50,  3.73it/s]

Epoch: 1, Loss: 5.827102184295654


Processing epoch 00:  18%|█▊        | 855/4850 [03:52<17:52,  3.73it/s]

Epoch: 1, Loss: 6.019071578979492


Processing epoch 00:  18%|█▊        | 856/4850 [03:52<17:52,  3.73it/s]

Epoch: 1, Loss: 5.778902053833008


Processing epoch 00:  18%|█▊        | 857/4850 [03:52<17:48,  3.74it/s]

Epoch: 1, Loss: 6.3928022384643555


Processing epoch 00:  18%|█▊        | 858/4850 [03:52<17:42,  3.76it/s]

Epoch: 1, Loss: 5.916879653930664


Processing epoch 00:  18%|█▊        | 859/4850 [03:53<17:39,  3.77it/s]

Epoch: 1, Loss: 5.889983177185059


Processing epoch 00:  18%|█▊        | 860/4850 [03:53<17:43,  3.75it/s]

Epoch: 1, Loss: 5.890162944793701


Processing epoch 00:  18%|█▊        | 861/4850 [03:53<17:44,  3.75it/s]

Epoch: 1, Loss: 5.858120918273926


Processing epoch 00:  18%|█▊        | 862/4850 [03:53<17:47,  3.73it/s]

Epoch: 1, Loss: 5.407469272613525


Processing epoch 00:  18%|█▊        | 863/4850 [03:54<17:50,  3.73it/s]

Epoch: 1, Loss: 5.740012168884277


Processing epoch 00:  18%|█▊        | 864/4850 [03:54<17:47,  3.73it/s]

Epoch: 1, Loss: 4.925723552703857


Processing epoch 00:  18%|█▊        | 865/4850 [03:54<17:45,  3.74it/s]

Epoch: 1, Loss: 5.634541988372803


Processing epoch 00:  18%|█▊        | 866/4850 [03:55<17:45,  3.74it/s]

Epoch: 1, Loss: 6.150439262390137


Processing epoch 00:  18%|█▊        | 867/4850 [03:55<17:47,  3.73it/s]

Epoch: 1, Loss: 5.841507434844971


Processing epoch 00:  18%|█▊        | 868/4850 [03:55<17:46,  3.73it/s]

Epoch: 1, Loss: 5.492032051086426


Processing epoch 00:  18%|█▊        | 869/4850 [03:55<17:44,  3.74it/s]

Epoch: 1, Loss: 5.4451375007629395


Processing epoch 00:  18%|█▊        | 870/4850 [03:56<17:50,  3.72it/s]

Epoch: 1, Loss: 5.924757957458496


Processing epoch 00:  18%|█▊        | 871/4850 [03:56<17:52,  3.71it/s]

Epoch: 1, Loss: 5.322592735290527


Processing epoch 00:  18%|█▊        | 872/4850 [03:56<18:00,  3.68it/s]

Epoch: 1, Loss: 5.790154457092285


Processing epoch 00:  18%|█▊        | 873/4850 [03:56<17:54,  3.70it/s]

Epoch: 1, Loss: 5.65825080871582


Processing epoch 00:  18%|█▊        | 874/4850 [03:57<17:51,  3.71it/s]

Epoch: 1, Loss: 6.110666275024414


Processing epoch 00:  18%|█▊        | 875/4850 [03:57<17:56,  3.69it/s]

Epoch: 1, Loss: 5.405231475830078


Processing epoch 00:  18%|█▊        | 876/4850 [03:57<17:52,  3.71it/s]

Epoch: 1, Loss: 5.850976943969727


Processing epoch 00:  18%|█▊        | 877/4850 [03:57<17:52,  3.70it/s]

Epoch: 1, Loss: 5.222075939178467


Processing epoch 00:  18%|█▊        | 878/4850 [03:58<17:52,  3.70it/s]

Epoch: 1, Loss: 6.168615818023682


Processing epoch 00:  18%|█▊        | 879/4850 [03:58<17:48,  3.72it/s]

Epoch: 1, Loss: 5.591073989868164


Processing epoch 00:  18%|█▊        | 880/4850 [03:58<17:48,  3.72it/s]

Epoch: 1, Loss: 5.282825946807861


Processing epoch 00:  18%|█▊        | 881/4850 [03:59<17:40,  3.74it/s]

Epoch: 1, Loss: 6.50860071182251


Processing epoch 00:  18%|█▊        | 882/4850 [03:59<17:47,  3.72it/s]

Epoch: 1, Loss: 5.105618476867676


Processing epoch 00:  18%|█▊        | 883/4850 [03:59<17:45,  3.72it/s]

Epoch: 1, Loss: 5.741323471069336


Processing epoch 00:  18%|█▊        | 884/4850 [03:59<17:40,  3.74it/s]

Epoch: 1, Loss: 5.620522499084473


Processing epoch 00:  18%|█▊        | 885/4850 [04:00<17:39,  3.74it/s]

Epoch: 1, Loss: 5.235147476196289


Processing epoch 00:  18%|█▊        | 886/4850 [04:00<17:54,  3.69it/s]

Epoch: 1, Loss: 5.53040885925293


Processing epoch 00:  18%|█▊        | 887/4850 [04:00<18:12,  3.63it/s]

Epoch: 1, Loss: 5.753601551055908


Processing epoch 00:  18%|█▊        | 888/4850 [04:00<18:29,  3.57it/s]

Epoch: 1, Loss: 5.439456939697266


Processing epoch 00:  18%|█▊        | 889/4850 [04:01<18:42,  3.53it/s]

Epoch: 1, Loss: 5.7383623123168945


Processing epoch 00:  18%|█▊        | 890/4850 [04:01<18:38,  3.54it/s]

Epoch: 1, Loss: 5.463970184326172


Processing epoch 00:  18%|█▊        | 891/4850 [04:01<18:53,  3.49it/s]

Epoch: 1, Loss: 5.210742950439453


Processing epoch 00:  18%|█▊        | 892/4850 [04:02<18:48,  3.51it/s]

Epoch: 1, Loss: 5.698702335357666


Processing epoch 00:  18%|█▊        | 893/4850 [04:02<18:53,  3.49it/s]

Epoch: 1, Loss: 5.092635631561279


Processing epoch 00:  18%|█▊        | 894/4850 [04:02<18:48,  3.51it/s]

Epoch: 1, Loss: 5.56351900100708


Processing epoch 00:  18%|█▊        | 895/4850 [04:02<18:46,  3.51it/s]

Epoch: 1, Loss: 6.221284866333008


Processing epoch 00:  18%|█▊        | 896/4850 [04:03<18:27,  3.57it/s]

Epoch: 1, Loss: 5.1829352378845215


Processing epoch 00:  18%|█▊        | 897/4850 [04:03<18:12,  3.62it/s]

Epoch: 1, Loss: 5.899400234222412


Processing epoch 00:  19%|█▊        | 898/4850 [04:03<17:58,  3.66it/s]

Epoch: 1, Loss: 5.612288475036621


Processing epoch 00:  19%|█▊        | 899/4850 [04:04<17:51,  3.69it/s]

Epoch: 1, Loss: 5.055704116821289


Processing epoch 00:  19%|█▊        | 900/4850 [04:04<17:49,  3.69it/s]

Epoch: 1, Loss: 5.045432090759277


Processing epoch 00:  19%|█▊        | 901/4850 [04:04<17:46,  3.70it/s]

Epoch: 1, Loss: 5.737122535705566


Processing epoch 00:  19%|█▊        | 902/4850 [04:04<17:41,  3.72it/s]

Epoch: 1, Loss: 5.589189052581787


Processing epoch 00:  19%|█▊        | 903/4850 [04:05<17:41,  3.72it/s]

Epoch: 1, Loss: 4.524813652038574


Processing epoch 00:  19%|█▊        | 904/4850 [04:05<17:34,  3.74it/s]

Epoch: 1, Loss: 5.468149185180664


Processing epoch 00:  19%|█▊        | 905/4850 [04:05<17:32,  3.75it/s]

Epoch: 1, Loss: 5.580873012542725


Processing epoch 00:  19%|█▊        | 906/4850 [04:05<17:41,  3.71it/s]

Epoch: 1, Loss: 5.558614253997803


Processing epoch 00:  19%|█▊        | 907/4850 [04:06<17:37,  3.73it/s]

Epoch: 1, Loss: 5.363742828369141


Processing epoch 00:  19%|█▊        | 908/4850 [04:06<17:33,  3.74it/s]

Epoch: 1, Loss: 6.190698623657227


Processing epoch 00:  19%|█▊        | 909/4850 [04:06<17:30,  3.75it/s]

Epoch: 1, Loss: 5.8283610343933105


Processing epoch 00:  19%|█▉        | 910/4850 [04:06<17:28,  3.76it/s]

Epoch: 1, Loss: 4.913513660430908


Processing epoch 00:  19%|█▉        | 911/4850 [04:07<17:25,  3.77it/s]

Epoch: 1, Loss: 5.74621057510376


Processing epoch 00:  19%|█▉        | 912/4850 [04:07<17:26,  3.76it/s]

Epoch: 1, Loss: 5.771766185760498


Processing epoch 00:  19%|█▉        | 913/4850 [04:07<17:29,  3.75it/s]

Epoch: 1, Loss: 5.472326278686523


Processing epoch 00:  19%|█▉        | 914/4850 [04:08<17:37,  3.72it/s]

Epoch: 1, Loss: 5.179531097412109


Processing epoch 00:  19%|█▉        | 915/4850 [04:08<17:36,  3.73it/s]

Epoch: 1, Loss: 6.032177925109863


Processing epoch 00:  19%|█▉        | 916/4850 [04:08<17:37,  3.72it/s]

Epoch: 1, Loss: 5.20382833480835


Processing epoch 00:  19%|█▉        | 917/4850 [04:08<17:34,  3.73it/s]

Epoch: 1, Loss: 6.096515655517578


Processing epoch 00:  19%|█▉        | 918/4850 [04:09<17:36,  3.72it/s]

Epoch: 1, Loss: 5.395070552825928


Processing epoch 00:  19%|█▉        | 919/4850 [04:09<17:34,  3.73it/s]

Epoch: 1, Loss: 5.3912482261657715


Processing epoch 00:  19%|█▉        | 920/4850 [04:09<17:37,  3.72it/s]

Epoch: 1, Loss: 4.885776519775391


Processing epoch 00:  19%|█▉        | 921/4850 [04:09<17:34,  3.73it/s]

Epoch: 1, Loss: 5.783566474914551


Processing epoch 00:  19%|█▉        | 922/4850 [04:10<17:36,  3.72it/s]

Epoch: 1, Loss: 5.615015506744385


Processing epoch 00:  19%|█▉        | 923/4850 [04:10<17:33,  3.73it/s]

Epoch: 1, Loss: 5.596811294555664


Processing epoch 00:  19%|█▉        | 924/4850 [04:10<17:33,  3.73it/s]

Epoch: 1, Loss: 4.890047550201416


Processing epoch 00:  19%|█▉        | 925/4850 [04:11<17:38,  3.71it/s]

Epoch: 1, Loss: 5.620993614196777


Processing epoch 00:  19%|█▉        | 926/4850 [04:11<17:39,  3.70it/s]

Epoch: 1, Loss: 5.055164813995361


Processing epoch 00:  19%|█▉        | 927/4850 [04:11<17:42,  3.69it/s]

Epoch: 1, Loss: 5.685775279998779


Processing epoch 00:  19%|█▉        | 928/4850 [04:11<17:39,  3.70it/s]

Epoch: 1, Loss: 5.542205810546875


Processing epoch 00:  19%|█▉        | 929/4850 [04:12<17:45,  3.68it/s]

Epoch: 1, Loss: 6.339532375335693


Processing epoch 00:  19%|█▉        | 930/4850 [04:12<17:39,  3.70it/s]

Epoch: 1, Loss: 4.965056896209717


Processing epoch 00:  19%|█▉        | 931/4850 [04:12<17:33,  3.72it/s]

Epoch: 1, Loss: 5.082196235656738


Processing epoch 00:  19%|█▉        | 932/4850 [04:12<17:31,  3.73it/s]

Epoch: 1, Loss: 5.3047003746032715


Processing epoch 00:  19%|█▉        | 933/4850 [04:13<17:59,  3.63it/s]

Epoch: 1, Loss: 5.399692058563232


Processing epoch 00:  19%|█▉        | 934/4850 [04:13<18:10,  3.59it/s]

Epoch: 1, Loss: 5.667360782623291


Processing epoch 00:  19%|█▉        | 935/4850 [04:13<18:17,  3.57it/s]

Epoch: 1, Loss: 5.512825012207031


Processing epoch 00:  19%|█▉        | 936/4850 [04:14<18:26,  3.54it/s]

Epoch: 1, Loss: 5.5802412033081055


Processing epoch 00:  19%|█▉        | 937/4850 [04:14<18:40,  3.49it/s]

Epoch: 1, Loss: 5.783975601196289


Processing epoch 00:  19%|█▉        | 938/4850 [04:14<18:39,  3.50it/s]

Epoch: 1, Loss: 5.484397888183594


Processing epoch 00:  19%|█▉        | 939/4850 [04:14<18:40,  3.49it/s]

Epoch: 1, Loss: 5.117009162902832


Processing epoch 00:  19%|█▉        | 940/4850 [04:15<18:55,  3.44it/s]

Epoch: 1, Loss: 6.383446216583252


Processing epoch 00:  19%|█▉        | 941/4850 [04:15<18:55,  3.44it/s]

Epoch: 1, Loss: 4.852642059326172


Processing epoch 00:  19%|█▉        | 942/4850 [04:15<19:01,  3.42it/s]

Epoch: 1, Loss: 5.434044361114502


Processing epoch 00:  19%|█▉        | 943/4850 [04:16<18:31,  3.51it/s]

Epoch: 1, Loss: 4.910292625427246


Processing epoch 00:  19%|█▉        | 944/4850 [04:16<18:08,  3.59it/s]

Epoch: 1, Loss: 5.878264904022217


Processing epoch 00:  19%|█▉        | 945/4850 [04:16<17:56,  3.63it/s]

Epoch: 1, Loss: 5.182882785797119


Processing epoch 00:  20%|█▉        | 946/4850 [04:16<17:46,  3.66it/s]

Epoch: 1, Loss: 5.6681294441223145


Processing epoch 00:  20%|█▉        | 947/4850 [04:17<17:37,  3.69it/s]

Epoch: 1, Loss: 5.8152546882629395


Processing epoch 00:  20%|█▉        | 948/4850 [04:17<17:33,  3.70it/s]

Epoch: 1, Loss: 5.772843360900879


Processing epoch 00:  20%|█▉        | 949/4850 [04:17<17:29,  3.72it/s]

Epoch: 1, Loss: 5.276625156402588


Processing epoch 00:  20%|█▉        | 950/4850 [04:17<17:24,  3.73it/s]

Epoch: 1, Loss: 5.770999908447266


Processing epoch 00:  20%|█▉        | 951/4850 [04:18<17:25,  3.73it/s]

Epoch: 1, Loss: 5.063335418701172


Processing epoch 00:  20%|█▉        | 952/4850 [04:18<17:25,  3.73it/s]

Epoch: 1, Loss: 5.663214683532715


Processing epoch 00:  20%|█▉        | 953/4850 [04:18<17:35,  3.69it/s]

Epoch: 1, Loss: 5.742908477783203


Processing epoch 00:  20%|█▉        | 954/4850 [04:19<17:33,  3.70it/s]

Epoch: 1, Loss: 5.300419807434082


Processing epoch 00:  20%|█▉        | 955/4850 [04:19<17:38,  3.68it/s]

Epoch: 1, Loss: 4.8834614753723145


Processing epoch 00:  20%|█▉        | 956/4850 [04:19<17:33,  3.70it/s]

Epoch: 1, Loss: 5.182389736175537


Processing epoch 00:  20%|█▉        | 957/4850 [04:19<17:27,  3.72it/s]

Epoch: 1, Loss: 5.728941440582275


Processing epoch 00:  20%|█▉        | 958/4850 [04:20<17:32,  3.70it/s]

Epoch: 1, Loss: 4.833120822906494


Processing epoch 00:  20%|█▉        | 959/4850 [04:20<17:36,  3.68it/s]

Epoch: 1, Loss: 4.424875736236572


Processing epoch 00:  20%|█▉        | 960/4850 [04:20<17:33,  3.69it/s]

Epoch: 1, Loss: 4.646152496337891


Processing epoch 00:  20%|█▉        | 961/4850 [04:20<17:29,  3.71it/s]

Epoch: 1, Loss: 5.202919960021973


Processing epoch 00:  20%|█▉        | 962/4850 [04:21<17:31,  3.70it/s]

Epoch: 1, Loss: 4.855506420135498


Processing epoch 00:  20%|█▉        | 963/4850 [04:21<17:27,  3.71it/s]

Epoch: 1, Loss: 4.512472629547119


Processing epoch 00:  20%|█▉        | 964/4850 [04:21<17:25,  3.72it/s]

Epoch: 1, Loss: 5.396466255187988


Processing epoch 00:  20%|█▉        | 965/4850 [04:22<17:27,  3.71it/s]

Epoch: 1, Loss: 5.9743971824646


Processing epoch 00:  20%|█▉        | 966/4850 [04:22<17:26,  3.71it/s]

Epoch: 1, Loss: 5.333519458770752


Processing epoch 00:  20%|█▉        | 967/4850 [04:22<17:27,  3.71it/s]

Epoch: 1, Loss: 5.977479934692383


Processing epoch 00:  20%|█▉        | 968/4850 [04:22<17:23,  3.72it/s]

Epoch: 1, Loss: 5.072853088378906


Processing epoch 00:  20%|█▉        | 969/4850 [04:23<17:21,  3.73it/s]

Epoch: 1, Loss: 5.632920265197754


Processing epoch 00:  20%|██        | 970/4850 [04:23<17:22,  3.72it/s]

Epoch: 1, Loss: 5.875725746154785


Processing epoch 00:  20%|██        | 971/4850 [04:23<17:27,  3.70it/s]

Epoch: 1, Loss: 4.796267986297607


Processing epoch 00:  20%|██        | 972/4850 [04:23<17:24,  3.71it/s]

Epoch: 1, Loss: 6.0409932136535645


Processing epoch 00:  20%|██        | 973/4850 [04:24<17:27,  3.70it/s]

Epoch: 1, Loss: 6.029016494750977


Processing epoch 00:  20%|██        | 974/4850 [04:24<17:33,  3.68it/s]

Epoch: 1, Loss: 4.6170549392700195


Processing epoch 00:  20%|██        | 975/4850 [04:24<17:30,  3.69it/s]

Epoch: 1, Loss: 5.34071159362793


Processing epoch 00:  20%|██        | 976/4850 [04:24<17:27,  3.70it/s]

Epoch: 1, Loss: 5.733172416687012


Processing epoch 00:  20%|██        | 977/4850 [04:25<17:26,  3.70it/s]

Epoch: 1, Loss: 5.908511161804199


Processing epoch 00:  20%|██        | 978/4850 [04:25<17:30,  3.69it/s]

Epoch: 1, Loss: 5.0161237716674805


Processing epoch 00:  20%|██        | 979/4850 [04:25<17:28,  3.69it/s]

Epoch: 1, Loss: 4.653067588806152


Processing epoch 00:  20%|██        | 980/4850 [04:26<17:51,  3.61it/s]

Epoch: 1, Loss: 5.481752872467041


Processing epoch 00:  20%|██        | 981/4850 [04:26<18:13,  3.54it/s]

Epoch: 1, Loss: 5.616827487945557


Processing epoch 00:  20%|██        | 982/4850 [04:26<18:17,  3.53it/s]

Epoch: 1, Loss: 4.57578182220459


Processing epoch 00:  20%|██        | 983/4850 [04:26<18:14,  3.53it/s]

Epoch: 1, Loss: 5.613678932189941


Processing epoch 00:  20%|██        | 984/4850 [04:27<18:15,  3.53it/s]

Epoch: 1, Loss: 4.951780319213867


Processing epoch 00:  20%|██        | 985/4850 [04:27<18:18,  3.52it/s]

Epoch: 1, Loss: 4.804998397827148


Processing epoch 00:  20%|██        | 986/4850 [04:27<18:30,  3.48it/s]

Epoch: 1, Loss: 5.34795618057251


Processing epoch 00:  20%|██        | 987/4850 [04:28<18:28,  3.48it/s]

Epoch: 1, Loss: 5.020047187805176


Processing epoch 00:  20%|██        | 988/4850 [04:28<18:29,  3.48it/s]

Epoch: 1, Loss: 4.750671863555908


Processing epoch 00:  20%|██        | 989/4850 [04:28<18:09,  3.54it/s]

Epoch: 1, Loss: 5.763223171234131


Processing epoch 00:  20%|██        | 990/4850 [04:28<17:53,  3.60it/s]

Epoch: 1, Loss: 5.199583053588867


Processing epoch 00:  20%|██        | 991/4850 [04:29<17:39,  3.64it/s]

Epoch: 1, Loss: 5.44047212600708


Processing epoch 00:  20%|██        | 992/4850 [04:29<17:34,  3.66it/s]

Epoch: 1, Loss: 5.484682559967041


Processing epoch 00:  20%|██        | 993/4850 [04:29<17:26,  3.68it/s]

Epoch: 1, Loss: 5.180762767791748


Processing epoch 00:  20%|██        | 994/4850 [04:29<17:23,  3.70it/s]

Epoch: 1, Loss: 4.737447261810303


Processing epoch 00:  21%|██        | 995/4850 [04:30<17:17,  3.71it/s]

Epoch: 1, Loss: 6.033065319061279


Processing epoch 00:  21%|██        | 996/4850 [04:30<17:19,  3.71it/s]

Epoch: 1, Loss: 5.471174240112305


Processing epoch 00:  21%|██        | 997/4850 [04:30<17:15,  3.72it/s]

Epoch: 1, Loss: 5.725226879119873


Processing epoch 00:  21%|██        | 998/4850 [04:31<17:17,  3.71it/s]

Epoch: 1, Loss: 5.008334159851074


Processing epoch 00:  21%|██        | 999/4850 [04:31<17:19,  3.70it/s]

Epoch: 1, Loss: 4.667387008666992


Processing epoch 00:  21%|██        | 1000/4850 [04:31<17:20,  3.70it/s]

Epoch: 1, Loss: 4.379398822784424


Processing epoch 00:  21%|██        | 1001/4850 [04:31<17:18,  3.71it/s]

Epoch: 1, Loss: 5.861050605773926


Processing epoch 00:  21%|██        | 1002/4850 [04:32<17:17,  3.71it/s]

Epoch: 1, Loss: 5.587102890014648


Processing epoch 00:  21%|██        | 1003/4850 [04:32<17:13,  3.72it/s]

Epoch: 1, Loss: 5.752723693847656


Processing epoch 00:  21%|██        | 1004/4850 [04:32<17:16,  3.71it/s]

Epoch: 1, Loss: 5.990574836730957


Processing epoch 00:  21%|██        | 1005/4850 [04:32<17:20,  3.69it/s]

Epoch: 1, Loss: 6.232649803161621


Processing epoch 00:  21%|██        | 1006/4850 [04:33<17:20,  3.69it/s]

Epoch: 1, Loss: 4.500256538391113


Processing epoch 00:  21%|██        | 1007/4850 [04:33<17:20,  3.69it/s]

Epoch: 1, Loss: 4.624614715576172


Processing epoch 00:  21%|██        | 1008/4850 [04:33<17:23,  3.68it/s]

Epoch: 1, Loss: 5.251918792724609


Processing epoch 00:  21%|██        | 1009/4850 [04:34<17:16,  3.71it/s]

Epoch: 1, Loss: 5.111149787902832


Processing epoch 00:  21%|██        | 1010/4850 [04:34<17:19,  3.69it/s]

Epoch: 1, Loss: 5.185085773468018


Processing epoch 00:  21%|██        | 1011/4850 [04:34<17:20,  3.69it/s]

Epoch: 1, Loss: 4.6637372970581055


Processing epoch 00:  21%|██        | 1012/4850 [04:34<17:17,  3.70it/s]

Epoch: 1, Loss: 5.8001813888549805


Processing epoch 00:  21%|██        | 1013/4850 [04:35<17:18,  3.70it/s]

Epoch: 1, Loss: 5.5432915687561035


Processing epoch 00:  21%|██        | 1014/4850 [04:35<17:14,  3.71it/s]

Epoch: 1, Loss: 5.753465175628662


Processing epoch 00:  21%|██        | 1015/4850 [04:35<17:14,  3.71it/s]

Epoch: 1, Loss: 5.113167762756348


Processing epoch 00:  21%|██        | 1016/4850 [04:35<17:14,  3.71it/s]

Epoch: 1, Loss: 5.612975120544434


Processing epoch 00:  21%|██        | 1017/4850 [04:36<17:09,  3.72it/s]

Epoch: 1, Loss: 4.862496852874756


Processing epoch 00:  21%|██        | 1018/4850 [04:36<17:09,  3.72it/s]

Epoch: 1, Loss: 5.477904319763184


Processing epoch 00:  21%|██        | 1019/4850 [04:36<17:11,  3.71it/s]

Epoch: 1, Loss: 4.943412780761719


Processing epoch 00:  21%|██        | 1020/4850 [04:37<17:11,  3.71it/s]

Epoch: 1, Loss: 6.105929374694824


Processing epoch 00:  21%|██        | 1021/4850 [04:37<17:12,  3.71it/s]

Epoch: 1, Loss: 5.835566520690918


Processing epoch 00:  21%|██        | 1022/4850 [04:37<17:12,  3.71it/s]

Epoch: 1, Loss: 5.347675323486328


Processing epoch 00:  21%|██        | 1023/4850 [04:37<17:17,  3.69it/s]

Epoch: 1, Loss: 5.54931116104126


Processing epoch 00:  21%|██        | 1024/4850 [04:38<17:07,  3.72it/s]

Epoch: 1, Loss: 5.659121036529541


Processing epoch 00:  21%|██        | 1025/4850 [04:38<17:14,  3.70it/s]

Epoch: 1, Loss: 4.124594211578369


Processing epoch 00:  21%|██        | 1026/4850 [04:38<17:39,  3.61it/s]

Epoch: 1, Loss: 5.228713512420654


Processing epoch 00:  21%|██        | 1027/4850 [04:38<17:46,  3.58it/s]

Epoch: 1, Loss: 5.626170635223389


Processing epoch 00:  21%|██        | 1028/4850 [04:39<17:59,  3.54it/s]

Epoch: 1, Loss: 5.015133380889893


Processing epoch 00:  21%|██        | 1029/4850 [04:39<18:10,  3.50it/s]

Epoch: 1, Loss: 5.749961853027344


Processing epoch 00:  21%|██        | 1030/4850 [04:39<18:09,  3.50it/s]

Epoch: 1, Loss: 5.384037494659424


Processing epoch 00:  21%|██▏       | 1031/4850 [04:40<18:10,  3.50it/s]

Epoch: 1, Loss: 5.0044732093811035


Processing epoch 00:  21%|██▏       | 1032/4850 [04:40<18:16,  3.48it/s]

Epoch: 1, Loss: 5.060687065124512


Processing epoch 00:  21%|██▏       | 1033/4850 [04:40<18:23,  3.46it/s]

Epoch: 1, Loss: 5.982840538024902


Processing epoch 00:  21%|██▏       | 1034/4850 [04:40<18:27,  3.45it/s]

Epoch: 1, Loss: 4.616060733795166


Processing epoch 00:  21%|██▏       | 1035/4850 [04:41<18:22,  3.46it/s]

Epoch: 1, Loss: 5.221264839172363


Processing epoch 00:  21%|██▏       | 1036/4850 [04:41<17:58,  3.54it/s]

Epoch: 1, Loss: 5.923123359680176


Processing epoch 00:  21%|██▏       | 1037/4850 [04:41<17:41,  3.59it/s]

Epoch: 1, Loss: 4.691580772399902


Processing epoch 00:  21%|██▏       | 1038/4850 [04:42<17:41,  3.59it/s]

Epoch: 1, Loss: 5.025625705718994


Processing epoch 00:  21%|██▏       | 1039/4850 [04:42<17:24,  3.65it/s]

Epoch: 1, Loss: 5.6157546043396


Processing epoch 00:  21%|██▏       | 1040/4850 [04:42<17:16,  3.68it/s]

Epoch: 1, Loss: 6.47752571105957


Processing epoch 00:  21%|██▏       | 1041/4850 [04:42<17:12,  3.69it/s]

Epoch: 1, Loss: 4.88547945022583


Processing epoch 00:  21%|██▏       | 1042/4850 [04:43<17:09,  3.70it/s]

Epoch: 1, Loss: 5.2959465980529785


Processing epoch 00:  22%|██▏       | 1043/4850 [04:43<17:05,  3.71it/s]

Epoch: 1, Loss: 5.106314182281494


Processing epoch 00:  22%|██▏       | 1044/4850 [04:43<16:59,  3.73it/s]

Epoch: 1, Loss: 6.069488048553467


Processing epoch 00:  22%|██▏       | 1045/4850 [04:43<17:00,  3.73it/s]

Epoch: 1, Loss: 4.710934162139893


Processing epoch 00:  22%|██▏       | 1046/4850 [04:44<17:04,  3.71it/s]

Epoch: 1, Loss: 4.798295497894287


Processing epoch 00:  22%|██▏       | 1047/4850 [04:44<17:03,  3.71it/s]

Epoch: 1, Loss: 4.803727149963379


Processing epoch 00:  22%|██▏       | 1048/4850 [04:44<16:59,  3.73it/s]

Epoch: 1, Loss: 5.9362993240356445


Processing epoch 00:  22%|██▏       | 1049/4850 [04:45<16:57,  3.73it/s]

Epoch: 1, Loss: 5.267254829406738


Processing epoch 00:  22%|██▏       | 1050/4850 [04:45<17:01,  3.72it/s]

Epoch: 1, Loss: 5.194995880126953


Processing epoch 00:  22%|██▏       | 1051/4850 [04:45<17:03,  3.71it/s]

Epoch: 1, Loss: 5.785996437072754


Processing epoch 00:  22%|██▏       | 1052/4850 [04:45<17:03,  3.71it/s]

Epoch: 1, Loss: 5.182569980621338


Processing epoch 00:  22%|██▏       | 1053/4850 [04:46<16:57,  3.73it/s]

Epoch: 1, Loss: 5.654729843139648


Processing epoch 00:  22%|██▏       | 1054/4850 [04:46<16:59,  3.72it/s]

Epoch: 1, Loss: 4.72474479675293


Processing epoch 00:  22%|██▏       | 1055/4850 [04:46<17:00,  3.72it/s]

Epoch: 1, Loss: 5.1521782875061035


Processing epoch 00:  22%|██▏       | 1056/4850 [04:46<16:58,  3.72it/s]

Epoch: 1, Loss: 5.436600685119629


Processing epoch 00:  22%|██▏       | 1057/4850 [04:47<17:09,  3.69it/s]

Epoch: 1, Loss: 4.471408843994141


Processing epoch 00:  22%|██▏       | 1058/4850 [04:47<17:06,  3.69it/s]

Epoch: 1, Loss: 5.334659576416016


Processing epoch 00:  22%|██▏       | 1059/4850 [04:47<17:00,  3.72it/s]

Epoch: 1, Loss: 5.384475231170654


Processing epoch 00:  22%|██▏       | 1060/4850 [04:47<17:01,  3.71it/s]

Epoch: 1, Loss: 5.352199077606201


Processing epoch 00:  22%|██▏       | 1061/4850 [04:48<17:02,  3.70it/s]

Epoch: 1, Loss: 5.67770528793335


Processing epoch 00:  22%|██▏       | 1062/4850 [04:48<16:59,  3.72it/s]

Epoch: 1, Loss: 5.016471862792969


Processing epoch 00:  22%|██▏       | 1063/4850 [04:48<16:56,  3.73it/s]

Epoch: 1, Loss: 5.436048984527588


Processing epoch 00:  22%|██▏       | 1064/4850 [04:49<16:52,  3.74it/s]

Epoch: 1, Loss: 4.7818827629089355


Processing epoch 00:  22%|██▏       | 1065/4850 [04:49<16:55,  3.73it/s]

Epoch: 1, Loss: 5.257867813110352


Processing epoch 00:  22%|██▏       | 1066/4850 [04:49<16:59,  3.71it/s]

Epoch: 1, Loss: 4.757940292358398


Processing epoch 00:  22%|██▏       | 1067/4850 [04:49<16:56,  3.72it/s]

Epoch: 1, Loss: 5.651960849761963


Processing epoch 00:  22%|██▏       | 1068/4850 [04:50<16:53,  3.73it/s]

Epoch: 1, Loss: 5.2717695236206055


Processing epoch 00:  22%|██▏       | 1069/4850 [04:50<16:55,  3.72it/s]

Epoch: 1, Loss: 5.006189346313477


Processing epoch 00:  22%|██▏       | 1070/4850 [04:50<16:59,  3.71it/s]

Epoch: 1, Loss: 5.4349846839904785


Processing epoch 00:  22%|██▏       | 1071/4850 [04:50<16:56,  3.72it/s]

Epoch: 1, Loss: 4.87498664855957


Processing epoch 00:  22%|██▏       | 1072/4850 [04:51<16:51,  3.74it/s]

Epoch: 1, Loss: 4.869593620300293


Processing epoch 00:  22%|██▏       | 1073/4850 [04:51<17:13,  3.66it/s]

Epoch: 1, Loss: 4.867928981781006


Processing epoch 00:  22%|██▏       | 1074/4850 [04:51<17:32,  3.59it/s]

Epoch: 1, Loss: 5.200807571411133


Processing epoch 00:  22%|██▏       | 1075/4850 [04:52<17:46,  3.54it/s]

Epoch: 1, Loss: 4.892552852630615


Processing epoch 00:  22%|██▏       | 1076/4850 [04:52<17:55,  3.51it/s]

Epoch: 1, Loss: 5.030506610870361


Processing epoch 00:  22%|██▏       | 1077/4850 [04:52<17:49,  3.53it/s]

Epoch: 1, Loss: 5.829833030700684


Processing epoch 00:  22%|██▏       | 1078/4850 [04:52<18:03,  3.48it/s]

Epoch: 1, Loss: 4.9361491203308105


Processing epoch 00:  22%|██▏       | 1079/4850 [04:53<18:05,  3.47it/s]

Epoch: 1, Loss: 4.871485710144043


Processing epoch 00:  22%|██▏       | 1080/4850 [04:53<18:07,  3.47it/s]

Epoch: 1, Loss: 4.879778861999512


Processing epoch 00:  22%|██▏       | 1081/4850 [04:53<18:05,  3.47it/s]

Epoch: 1, Loss: 5.615905284881592


Processing epoch 00:  22%|██▏       | 1082/4850 [04:54<17:44,  3.54it/s]

Epoch: 1, Loss: 5.69221830368042


Processing epoch 00:  22%|██▏       | 1083/4850 [04:54<17:34,  3.57it/s]

Epoch: 1, Loss: 5.259801387786865


Processing epoch 00:  22%|██▏       | 1084/4850 [04:54<17:22,  3.61it/s]

Epoch: 1, Loss: 5.297378063201904


Processing epoch 00:  22%|██▏       | 1085/4850 [04:54<17:12,  3.65it/s]

Epoch: 1, Loss: 5.37156867980957


Processing epoch 00:  22%|██▏       | 1086/4850 [04:55<17:05,  3.67it/s]

Epoch: 1, Loss: 5.180966854095459


Processing epoch 00:  22%|██▏       | 1087/4850 [04:55<17:04,  3.67it/s]

Epoch: 1, Loss: 5.352522850036621


Processing epoch 00:  22%|██▏       | 1088/4850 [04:55<17:04,  3.67it/s]

Epoch: 1, Loss: 4.788748741149902


Processing epoch 00:  22%|██▏       | 1089/4850 [04:55<16:57,  3.70it/s]

Epoch: 1, Loss: 4.892112731933594


Processing epoch 00:  22%|██▏       | 1090/4850 [04:56<16:52,  3.71it/s]

Epoch: 1, Loss: 5.271172523498535


Processing epoch 00:  22%|██▏       | 1091/4850 [04:56<16:50,  3.72it/s]

Epoch: 1, Loss: 5.282071113586426


Processing epoch 00:  23%|██▎       | 1092/4850 [04:56<16:54,  3.71it/s]

Epoch: 1, Loss: 5.066606521606445


Processing epoch 00:  23%|██▎       | 1093/4850 [04:57<16:49,  3.72it/s]

Epoch: 1, Loss: 5.557846546173096


Processing epoch 00:  23%|██▎       | 1094/4850 [04:57<16:47,  3.73it/s]

Epoch: 1, Loss: 5.647375583648682


Processing epoch 00:  23%|██▎       | 1095/4850 [04:57<16:46,  3.73it/s]

Epoch: 1, Loss: 5.570016384124756


Processing epoch 00:  23%|██▎       | 1096/4850 [04:57<16:49,  3.72it/s]

Epoch: 1, Loss: 4.879227161407471


Processing epoch 00:  23%|██▎       | 1097/4850 [04:58<16:49,  3.72it/s]

Epoch: 1, Loss: 5.3942461013793945


Processing epoch 00:  23%|██▎       | 1098/4850 [04:58<16:46,  3.73it/s]

Epoch: 1, Loss: 4.612363815307617


Processing epoch 00:  23%|██▎       | 1099/4850 [04:58<16:43,  3.74it/s]

Epoch: 1, Loss: 5.471080780029297


Processing epoch 00:  23%|██▎       | 1100/4850 [04:58<16:48,  3.72it/s]

Epoch: 1, Loss: 5.129333972930908


Processing epoch 00:  23%|██▎       | 1101/4850 [04:59<16:51,  3.71it/s]

Epoch: 1, Loss: 4.94680118560791


Processing epoch 00:  23%|██▎       | 1102/4850 [04:59<16:56,  3.69it/s]

Epoch: 1, Loss: 4.634572982788086


Processing epoch 00:  23%|██▎       | 1103/4850 [04:59<16:56,  3.68it/s]

Epoch: 1, Loss: 4.355123519897461


Processing epoch 00:  23%|██▎       | 1104/4850 [04:59<16:52,  3.70it/s]

Epoch: 1, Loss: 5.589012622833252


Processing epoch 00:  23%|██▎       | 1105/4850 [05:00<16:49,  3.71it/s]

Epoch: 1, Loss: 5.159816741943359


Processing epoch 00:  23%|██▎       | 1106/4850 [05:00<16:51,  3.70it/s]

Epoch: 1, Loss: 5.083742618560791


Processing epoch 00:  23%|██▎       | 1107/4850 [05:00<16:52,  3.70it/s]

Epoch: 1, Loss: 4.667323112487793


Processing epoch 00:  23%|██▎       | 1108/4850 [05:01<16:51,  3.70it/s]

Epoch: 1, Loss: 4.722555160522461


Processing epoch 00:  23%|██▎       | 1109/4850 [05:01<16:53,  3.69it/s]

Epoch: 1, Loss: 5.229526996612549


Processing epoch 00:  23%|██▎       | 1110/4850 [05:01<16:49,  3.71it/s]

Epoch: 1, Loss: 5.610427379608154


Processing epoch 00:  23%|██▎       | 1111/4850 [05:01<16:46,  3.72it/s]

Epoch: 1, Loss: 5.228204727172852


Processing epoch 00:  23%|██▎       | 1112/4850 [05:02<16:46,  3.72it/s]

Epoch: 1, Loss: 5.04313850402832


Processing epoch 00:  23%|██▎       | 1113/4850 [05:02<16:45,  3.72it/s]

Epoch: 1, Loss: 4.9882965087890625


Processing epoch 00:  23%|██▎       | 1114/4850 [05:02<16:46,  3.71it/s]

Epoch: 1, Loss: 4.107895851135254


Processing epoch 00:  23%|██▎       | 1115/4850 [05:02<16:42,  3.73it/s]

Epoch: 1, Loss: 4.566433906555176


Processing epoch 00:  23%|██▎       | 1116/4850 [05:03<16:44,  3.72it/s]

Epoch: 1, Loss: 5.009247303009033


Processing epoch 00:  23%|██▎       | 1117/4850 [05:03<16:41,  3.73it/s]

Epoch: 1, Loss: 5.470648765563965


Processing epoch 00:  23%|██▎       | 1118/4850 [05:03<16:45,  3.71it/s]

Epoch: 1, Loss: 5.818146228790283


Processing epoch 00:  23%|██▎       | 1119/4850 [05:04<17:11,  3.62it/s]

Epoch: 1, Loss: 5.406181335449219


Processing epoch 00:  23%|██▎       | 1120/4850 [05:04<17:31,  3.55it/s]

Epoch: 1, Loss: 5.509539604187012


Processing epoch 00:  23%|██▎       | 1121/4850 [05:04<17:35,  3.53it/s]

Epoch: 1, Loss: 5.054308891296387


Processing epoch 00:  23%|██▎       | 1122/4850 [05:04<17:37,  3.53it/s]

Epoch: 1, Loss: 4.812970161437988


Processing epoch 00:  23%|██▎       | 1123/4850 [05:05<17:36,  3.53it/s]

Epoch: 1, Loss: 4.987954139709473


Processing epoch 00:  23%|██▎       | 1124/4850 [05:05<17:44,  3.50it/s]

Epoch: 1, Loss: 5.529272079467773


Processing epoch 00:  23%|██▎       | 1125/4850 [05:05<17:45,  3.50it/s]

Epoch: 1, Loss: 4.7613139152526855


Processing epoch 00:  23%|██▎       | 1126/4850 [05:06<17:59,  3.45it/s]

Epoch: 1, Loss: 5.057143688201904


Processing epoch 00:  23%|██▎       | 1127/4850 [05:06<17:52,  3.47it/s]

Epoch: 1, Loss: 5.098353862762451


Processing epoch 00:  23%|██▎       | 1128/4850 [05:06<18:06,  3.43it/s]

Epoch: 1, Loss: 5.0570969581604


Processing epoch 00:  23%|██▎       | 1129/4850 [05:06<17:42,  3.50it/s]

Epoch: 1, Loss: 5.286201000213623


Processing epoch 00:  23%|██▎       | 1130/4850 [05:07<17:25,  3.56it/s]

Epoch: 1, Loss: 4.884223461151123


Processing epoch 00:  23%|██▎       | 1131/4850 [05:07<17:13,  3.60it/s]

Epoch: 1, Loss: 4.836254119873047


Processing epoch 00:  23%|██▎       | 1132/4850 [05:07<17:14,  3.59it/s]

Epoch: 1, Loss: 4.115312099456787


Processing epoch 00:  23%|██▎       | 1133/4850 [05:08<17:04,  3.63it/s]

Epoch: 1, Loss: 5.7058916091918945


Processing epoch 00:  23%|██▎       | 1134/4850 [05:08<16:54,  3.66it/s]

Epoch: 1, Loss: 4.967848777770996


Processing epoch 00:  23%|██▎       | 1135/4850 [05:08<16:50,  3.68it/s]

Epoch: 1, Loss: 4.836907386779785


Processing epoch 00:  23%|██▎       | 1136/4850 [05:08<16:45,  3.69it/s]

Epoch: 1, Loss: 5.769374370574951


Processing epoch 00:  23%|██▎       | 1137/4850 [05:09<16:41,  3.71it/s]

Epoch: 1, Loss: 4.604943752288818


Processing epoch 00:  23%|██▎       | 1138/4850 [05:09<16:43,  3.70it/s]

Epoch: 1, Loss: 4.5325727462768555


Processing epoch 00:  23%|██▎       | 1139/4850 [05:09<16:36,  3.72it/s]

Epoch: 1, Loss: 5.750697135925293


Processing epoch 00:  24%|██▎       | 1140/4850 [05:09<16:36,  3.72it/s]

Epoch: 1, Loss: 4.862358093261719


Processing epoch 00:  24%|██▎       | 1141/4850 [05:10<16:37,  3.72it/s]

Epoch: 1, Loss: 4.381825923919678


Processing epoch 00:  24%|██▎       | 1142/4850 [05:10<16:34,  3.73it/s]

Epoch: 1, Loss: 5.683604717254639


Processing epoch 00:  24%|██▎       | 1143/4850 [05:10<16:31,  3.74it/s]

Epoch: 1, Loss: 5.563442230224609


Processing epoch 00:  24%|██▎       | 1144/4850 [05:10<16:35,  3.72it/s]

Epoch: 1, Loss: 5.073901653289795


Processing epoch 00:  24%|██▎       | 1145/4850 [05:11<16:38,  3.71it/s]

Epoch: 1, Loss: 4.732990741729736


Processing epoch 00:  24%|██▎       | 1146/4850 [05:11<16:35,  3.72it/s]

Epoch: 1, Loss: 5.7887043952941895


Processing epoch 00:  24%|██▎       | 1147/4850 [05:11<16:37,  3.71it/s]

Epoch: 1, Loss: 5.170505046844482


Processing epoch 00:  24%|██▎       | 1148/4850 [05:12<16:36,  3.72it/s]

Epoch: 1, Loss: 4.78499698638916


Processing epoch 00:  24%|██▎       | 1149/4850 [05:12<16:36,  3.71it/s]

Epoch: 1, Loss: 4.491938591003418


Processing epoch 00:  24%|██▎       | 1150/4850 [05:12<16:35,  3.72it/s]

Epoch: 1, Loss: 5.630401134490967


Processing epoch 00:  24%|██▎       | 1151/4850 [05:12<16:41,  3.69it/s]

Epoch: 1, Loss: 4.834245681762695


Processing epoch 00:  24%|██▍       | 1152/4850 [05:13<16:36,  3.71it/s]

Epoch: 1, Loss: 5.368948936462402


Processing epoch 00:  24%|██▍       | 1153/4850 [05:13<16:33,  3.72it/s]

Epoch: 1, Loss: 4.9116597175598145


Processing epoch 00:  24%|██▍       | 1154/4850 [05:13<16:32,  3.72it/s]

Epoch: 1, Loss: 5.082409381866455


Processing epoch 00:  24%|██▍       | 1155/4850 [05:13<16:29,  3.73it/s]

Epoch: 1, Loss: 5.644371032714844


Processing epoch 00:  24%|██▍       | 1156/4850 [05:14<16:31,  3.73it/s]

Epoch: 1, Loss: 4.837118625640869


Processing epoch 00:  24%|██▍       | 1157/4850 [05:14<16:30,  3.73it/s]

Epoch: 1, Loss: 4.75531005859375


Processing epoch 00:  24%|██▍       | 1158/4850 [05:14<16:31,  3.72it/s]

Epoch: 1, Loss: 3.512364387512207


Processing epoch 00:  24%|██▍       | 1159/4850 [05:15<16:31,  3.72it/s]

Epoch: 1, Loss: 4.286957740783691


Processing epoch 00:  24%|██▍       | 1160/4850 [05:15<16:35,  3.71it/s]

Epoch: 1, Loss: 4.393463611602783


Processing epoch 00:  24%|██▍       | 1161/4850 [05:15<16:35,  3.70it/s]

Epoch: 1, Loss: 5.038088798522949


Processing epoch 00:  24%|██▍       | 1162/4850 [05:15<16:32,  3.72it/s]

Epoch: 1, Loss: 4.906519412994385


Processing epoch 00:  24%|██▍       | 1163/4850 [05:16<16:30,  3.72it/s]

Epoch: 1, Loss: 5.323510646820068


Processing epoch 00:  24%|██▍       | 1164/4850 [05:16<16:33,  3.71it/s]

Epoch: 1, Loss: 4.939816474914551


Processing epoch 00:  24%|██▍       | 1165/4850 [05:16<16:29,  3.72it/s]

Epoch: 1, Loss: 5.569894313812256


Processing epoch 00:  24%|██▍       | 1166/4850 [05:16<16:55,  3.63it/s]

Epoch: 1, Loss: 5.178869724273682


Processing epoch 00:  24%|██▍       | 1167/4850 [05:17<17:08,  3.58it/s]

Epoch: 1, Loss: 4.966924667358398


Processing epoch 00:  24%|██▍       | 1168/4850 [05:17<17:23,  3.53it/s]

Epoch: 1, Loss: 4.747838020324707


Processing epoch 00:  24%|██▍       | 1169/4850 [05:17<17:35,  3.49it/s]

Epoch: 1, Loss: 4.459944248199463


Processing epoch 00:  24%|██▍       | 1170/4850 [05:18<17:38,  3.48it/s]

Epoch: 1, Loss: 5.951569080352783


Processing epoch 00:  24%|██▍       | 1171/4850 [05:18<17:39,  3.47it/s]

Epoch: 1, Loss: 4.486408710479736


Processing epoch 00:  24%|██▍       | 1172/4850 [05:18<17:38,  3.47it/s]

Epoch: 1, Loss: 5.750343322753906


Processing epoch 00:  24%|██▍       | 1173/4850 [05:18<17:40,  3.47it/s]

Epoch: 1, Loss: 4.376016139984131


Processing epoch 00:  24%|██▍       | 1174/4850 [05:19<17:48,  3.44it/s]

Epoch: 1, Loss: 5.0156965255737305


Processing epoch 00:  24%|██▍       | 1175/4850 [05:19<17:37,  3.48it/s]

Epoch: 1, Loss: 5.845939636230469


Processing epoch 00:  24%|██▍       | 1176/4850 [05:19<17:18,  3.54it/s]

Epoch: 1, Loss: 5.257306098937988


Processing epoch 00:  24%|██▍       | 1177/4850 [05:20<17:11,  3.56it/s]

Epoch: 1, Loss: 4.753208160400391


Processing epoch 00:  24%|██▍       | 1178/4850 [05:20<17:07,  3.57it/s]

Epoch: 1, Loss: 4.785134792327881


Processing epoch 00:  24%|██▍       | 1179/4850 [05:20<16:54,  3.62it/s]

Epoch: 1, Loss: 5.34578275680542


Processing epoch 00:  24%|██▍       | 1180/4850 [05:20<16:46,  3.65it/s]

Epoch: 1, Loss: 5.076059341430664


Processing epoch 00:  24%|██▍       | 1181/4850 [05:21<16:42,  3.66it/s]

Epoch: 1, Loss: 5.168977737426758


Processing epoch 00:  24%|██▍       | 1182/4850 [05:21<16:36,  3.68it/s]

Epoch: 1, Loss: 4.9839067459106445


Processing epoch 00:  24%|██▍       | 1183/4850 [05:21<16:39,  3.67it/s]

Epoch: 1, Loss: 5.493076324462891


Processing epoch 00:  24%|██▍       | 1184/4850 [05:21<16:34,  3.69it/s]

Epoch: 1, Loss: 5.009238243103027


Processing epoch 00:  24%|██▍       | 1185/4850 [05:22<16:35,  3.68it/s]

Epoch: 1, Loss: 4.835666656494141


Processing epoch 00:  24%|██▍       | 1186/4850 [05:22<16:33,  3.69it/s]

Epoch: 1, Loss: 4.332055568695068


Processing epoch 00:  24%|██▍       | 1187/4850 [05:22<16:30,  3.70it/s]

Epoch: 1, Loss: 5.518142223358154


Processing epoch 00:  24%|██▍       | 1188/4850 [05:23<16:28,  3.70it/s]

Epoch: 1, Loss: 4.6456522941589355


Processing epoch 00:  25%|██▍       | 1189/4850 [05:23<16:27,  3.71it/s]

Epoch: 1, Loss: 4.917128086090088


Processing epoch 00:  25%|██▍       | 1190/4850 [05:23<16:23,  3.72it/s]

Epoch: 1, Loss: 5.704868793487549


Processing epoch 00:  25%|██▍       | 1191/4850 [05:23<16:24,  3.72it/s]

Epoch: 1, Loss: 4.467406272888184


Processing epoch 00:  25%|██▍       | 1192/4850 [05:24<16:25,  3.71it/s]

Epoch: 1, Loss: 4.660555839538574


Processing epoch 00:  25%|██▍       | 1193/4850 [05:24<16:29,  3.70it/s]

Epoch: 1, Loss: 4.5214009284973145


Processing epoch 00:  25%|██▍       | 1194/4850 [05:24<16:25,  3.71it/s]

Epoch: 1, Loss: 4.923774719238281


Processing epoch 00:  25%|██▍       | 1195/4850 [05:24<16:22,  3.72it/s]

Epoch: 1, Loss: 4.8613810539245605


Processing epoch 00:  25%|██▍       | 1196/4850 [05:25<16:26,  3.70it/s]

Epoch: 1, Loss: 5.3874945640563965


Processing epoch 00:  25%|██▍       | 1197/4850 [05:25<16:25,  3.71it/s]

Epoch: 1, Loss: 5.741524696350098


Processing epoch 00:  25%|██▍       | 1198/4850 [05:25<16:23,  3.71it/s]

Epoch: 1, Loss: 4.85660457611084


Processing epoch 00:  25%|██▍       | 1199/4850 [05:26<16:23,  3.71it/s]

Epoch: 1, Loss: 5.064236640930176


Processing epoch 00:  25%|██▍       | 1200/4850 [05:26<16:24,  3.71it/s]

Epoch: 1, Loss: 4.639128684997559


Processing epoch 00:  25%|██▍       | 1201/4850 [05:26<16:25,  3.70it/s]

Epoch: 1, Loss: 4.732349395751953


Processing epoch 00:  25%|██▍       | 1202/4850 [05:26<16:24,  3.70it/s]

Epoch: 1, Loss: 4.00635290145874


Processing epoch 00:  25%|██▍       | 1203/4850 [05:27<16:19,  3.72it/s]

Epoch: 1, Loss: 5.031009674072266


Processing epoch 00:  25%|██▍       | 1204/4850 [05:27<16:17,  3.73it/s]

Epoch: 1, Loss: 4.61609411239624


Processing epoch 00:  25%|██▍       | 1205/4850 [05:27<16:19,  3.72it/s]

Epoch: 1, Loss: 4.076254367828369


Processing epoch 00:  25%|██▍       | 1206/4850 [05:27<16:22,  3.71it/s]

Epoch: 1, Loss: 5.178892135620117


Processing epoch 00:  25%|██▍       | 1207/4850 [05:28<16:19,  3.72it/s]

Epoch: 1, Loss: 4.50116491317749


Processing epoch 00:  25%|██▍       | 1208/4850 [05:28<16:15,  3.73it/s]

Epoch: 1, Loss: 4.845127105712891


Processing epoch 00:  25%|██▍       | 1209/4850 [05:28<16:14,  3.74it/s]

Epoch: 1, Loss: 4.998014450073242


Processing epoch 00:  25%|██▍       | 1210/4850 [05:28<16:16,  3.73it/s]

Epoch: 1, Loss: 4.840760707855225


Processing epoch 00:  25%|██▍       | 1211/4850 [05:29<16:21,  3.71it/s]

Epoch: 1, Loss: 4.6326117515563965


Processing epoch 00:  25%|██▍       | 1212/4850 [05:29<16:25,  3.69it/s]

Epoch: 1, Loss: 4.509288787841797


Processing epoch 00:  25%|██▌       | 1213/4850 [05:29<16:47,  3.61it/s]

Epoch: 1, Loss: 5.260913372039795


Processing epoch 00:  25%|██▌       | 1214/4850 [05:30<17:08,  3.53it/s]

Epoch: 1, Loss: 4.222746849060059


Processing epoch 00:  25%|██▌       | 1215/4850 [05:30<17:13,  3.52it/s]

Epoch: 1, Loss: 4.526745796203613


Processing epoch 00:  25%|██▌       | 1216/4850 [05:30<17:10,  3.53it/s]

Epoch: 1, Loss: 4.7131853103637695


Processing epoch 00:  25%|██▌       | 1217/4850 [05:30<17:15,  3.51it/s]

Epoch: 1, Loss: 4.8667497634887695


Processing epoch 00:  25%|██▌       | 1218/4850 [05:31<17:17,  3.50it/s]

Epoch: 1, Loss: 5.40109920501709


Processing epoch 00:  25%|██▌       | 1219/4850 [05:31<17:23,  3.48it/s]

Epoch: 1, Loss: 4.4570183753967285


Processing epoch 00:  25%|██▌       | 1220/4850 [05:31<17:23,  3.48it/s]

Epoch: 1, Loss: 4.5809760093688965


Processing epoch 00:  25%|██▌       | 1221/4850 [05:32<17:25,  3.47it/s]

Epoch: 1, Loss: 5.1315436363220215


Processing epoch 00:  25%|██▌       | 1222/4850 [05:32<17:05,  3.54it/s]

Epoch: 1, Loss: 4.325931072235107


Processing epoch 00:  25%|██▌       | 1223/4850 [05:32<16:50,  3.59it/s]

Epoch: 1, Loss: 4.363344669342041


Processing epoch 00:  25%|██▌       | 1224/4850 [05:32<16:39,  3.63it/s]

Epoch: 1, Loss: 3.7083687782287598


Processing epoch 00:  25%|██▌       | 1225/4850 [05:33<16:38,  3.63it/s]

Epoch: 1, Loss: 5.674586296081543


Processing epoch 00:  25%|██▌       | 1226/4850 [05:33<16:44,  3.61it/s]

Epoch: 1, Loss: 4.688277721405029


Processing epoch 00:  25%|██▌       | 1227/4850 [05:33<16:39,  3.62it/s]

Epoch: 1, Loss: 4.602715969085693


Processing epoch 00:  25%|██▌       | 1228/4850 [05:34<16:32,  3.65it/s]

Epoch: 1, Loss: 5.407127380371094


Processing epoch 00:  25%|██▌       | 1229/4850 [05:34<16:27,  3.67it/s]

Epoch: 1, Loss: 5.113649368286133


Processing epoch 00:  25%|██▌       | 1230/4850 [05:34<16:24,  3.68it/s]

Epoch: 1, Loss: 5.076633930206299


Processing epoch 00:  25%|██▌       | 1231/4850 [05:34<16:16,  3.70it/s]

Epoch: 1, Loss: 5.469040393829346


Processing epoch 00:  25%|██▌       | 1232/4850 [05:35<16:14,  3.71it/s]

Epoch: 1, Loss: 5.3340959548950195


Processing epoch 00:  25%|██▌       | 1233/4850 [05:35<16:15,  3.71it/s]

Epoch: 1, Loss: 5.332519054412842


Processing epoch 00:  25%|██▌       | 1234/4850 [05:35<16:15,  3.71it/s]

Epoch: 1, Loss: 4.8434906005859375


Processing epoch 00:  25%|██▌       | 1235/4850 [05:35<16:15,  3.70it/s]

Epoch: 1, Loss: 4.981510162353516


Processing epoch 00:  25%|██▌       | 1236/4850 [05:36<16:17,  3.70it/s]

Epoch: 1, Loss: 4.521974563598633


Processing epoch 00:  26%|██▌       | 1237/4850 [05:36<16:15,  3.70it/s]

Epoch: 1, Loss: 4.321741104125977


Processing epoch 00:  26%|██▌       | 1238/4850 [05:36<16:07,  3.73it/s]

Epoch: 1, Loss: 5.734453201293945


Processing epoch 00:  26%|██▌       | 1239/4850 [05:36<16:07,  3.73it/s]

Epoch: 1, Loss: 5.7616167068481445


Processing epoch 00:  26%|██▌       | 1240/4850 [05:37<16:07,  3.73it/s]

Epoch: 1, Loss: 5.19500732421875


Processing epoch 00:  26%|██▌       | 1241/4850 [05:37<16:08,  3.73it/s]

Epoch: 1, Loss: 4.542142391204834


Processing epoch 00:  26%|██▌       | 1242/4850 [05:37<16:07,  3.73it/s]

Epoch: 1, Loss: 3.911959648132324


Processing epoch 00:  26%|██▌       | 1243/4850 [05:38<16:05,  3.73it/s]

Epoch: 1, Loss: 4.291692733764648


Processing epoch 00:  26%|██▌       | 1244/4850 [05:38<16:07,  3.73it/s]

Epoch: 1, Loss: 5.472470760345459


Processing epoch 00:  26%|██▌       | 1245/4850 [05:38<16:15,  3.69it/s]

Epoch: 1, Loss: 5.1237993240356445


Processing epoch 00:  26%|██▌       | 1246/4850 [05:38<16:08,  3.72it/s]

Epoch: 1, Loss: 5.516120433807373


Processing epoch 00:  26%|██▌       | 1247/4850 [05:39<16:09,  3.72it/s]

Epoch: 1, Loss: 4.601867198944092


Processing epoch 00:  26%|██▌       | 1248/4850 [05:39<16:10,  3.71it/s]

Epoch: 1, Loss: 5.480290412902832


Processing epoch 00:  26%|██▌       | 1249/4850 [05:39<16:08,  3.72it/s]

Epoch: 1, Loss: 5.234763145446777


Processing epoch 00:  26%|██▌       | 1250/4850 [05:39<16:07,  3.72it/s]

Epoch: 1, Loss: 5.239284038543701


Processing epoch 00:  26%|██▌       | 1251/4850 [05:40<16:05,  3.73it/s]

Epoch: 1, Loss: 4.934116840362549


Processing epoch 00:  26%|██▌       | 1252/4850 [05:40<16:02,  3.74it/s]

Epoch: 1, Loss: 5.5774993896484375


Processing epoch 00:  26%|██▌       | 1253/4850 [05:40<16:02,  3.74it/s]

Epoch: 1, Loss: 4.6756510734558105


Processing epoch 00:  26%|██▌       | 1254/4850 [05:41<16:07,  3.72it/s]

Epoch: 1, Loss: 3.8561971187591553


Processing epoch 00:  26%|██▌       | 1255/4850 [05:41<16:08,  3.71it/s]

Epoch: 1, Loss: 5.1681671142578125


Processing epoch 00:  26%|██▌       | 1256/4850 [05:41<16:10,  3.71it/s]

Epoch: 1, Loss: 4.4129462242126465


Processing epoch 00:  26%|██▌       | 1257/4850 [05:41<16:05,  3.72it/s]

Epoch: 1, Loss: 5.939565658569336


Processing epoch 00:  26%|██▌       | 1258/4850 [05:42<16:10,  3.70it/s]

Epoch: 1, Loss: 5.188794136047363


Processing epoch 00:  26%|██▌       | 1259/4850 [05:42<16:24,  3.65it/s]

Epoch: 1, Loss: 5.199147701263428


Processing epoch 00:  26%|██▌       | 1260/4850 [05:42<16:35,  3.60it/s]

Epoch: 1, Loss: 5.4235734939575195


Processing epoch 00:  26%|██▌       | 1261/4850 [05:42<16:39,  3.59it/s]

Epoch: 1, Loss: 5.412288665771484


Processing epoch 00:  26%|██▌       | 1262/4850 [05:43<16:41,  3.58it/s]

Epoch: 1, Loss: 5.692224979400635


Processing epoch 00:  26%|██▌       | 1263/4850 [05:43<16:44,  3.57it/s]

Epoch: 1, Loss: 4.850304126739502


Processing epoch 00:  26%|██▌       | 1264/4850 [05:43<16:51,  3.54it/s]

Epoch: 1, Loss: 5.304890155792236


Processing epoch 00:  26%|██▌       | 1265/4850 [05:44<16:50,  3.55it/s]

Epoch: 1, Loss: 5.715677261352539


Processing epoch 00:  26%|██▌       | 1266/4850 [05:44<16:57,  3.52it/s]

Epoch: 1, Loss: 5.928540229797363


Processing epoch 00:  26%|██▌       | 1267/4850 [05:44<16:57,  3.52it/s]

Epoch: 1, Loss: 4.5317230224609375


Processing epoch 00:  26%|██▌       | 1268/4850 [05:44<16:57,  3.52it/s]

Epoch: 1, Loss: 4.906121253967285


Processing epoch 00:  26%|██▌       | 1269/4850 [05:45<16:44,  3.56it/s]

Epoch: 1, Loss: 5.205624580383301


Processing epoch 00:  26%|██▌       | 1270/4850 [05:45<16:32,  3.61it/s]

Epoch: 1, Loss: 5.949728012084961


Processing epoch 00:  26%|██▌       | 1271/4850 [05:45<16:25,  3.63it/s]

Epoch: 1, Loss: 5.111818790435791


Processing epoch 00:  26%|██▌       | 1272/4850 [05:46<16:22,  3.64it/s]

Epoch: 1, Loss: 5.201779365539551


Processing epoch 00:  26%|██▌       | 1273/4850 [05:46<16:17,  3.66it/s]

Epoch: 1, Loss: 4.254683017730713


Processing epoch 00:  26%|██▋       | 1274/4850 [05:46<16:17,  3.66it/s]

Epoch: 1, Loss: 4.783908843994141


Processing epoch 00:  26%|██▋       | 1275/4850 [05:46<16:20,  3.65it/s]

Epoch: 1, Loss: 5.449209213256836


Processing epoch 00:  26%|██▋       | 1276/4850 [05:47<16:16,  3.66it/s]

Epoch: 1, Loss: 4.462790489196777


Processing epoch 00:  26%|██▋       | 1277/4850 [05:47<16:11,  3.68it/s]

Epoch: 1, Loss: 4.751382350921631


Processing epoch 00:  26%|██▋       | 1278/4850 [05:47<16:06,  3.69it/s]

Epoch: 1, Loss: 5.783545970916748


Processing epoch 00:  26%|██▋       | 1279/4850 [05:47<16:07,  3.69it/s]

Epoch: 1, Loss: 4.6120476722717285


Processing epoch 00:  26%|██▋       | 1280/4850 [05:48<16:09,  3.68it/s]

Epoch: 1, Loss: 4.717246055603027


Processing epoch 00:  26%|██▋       | 1281/4850 [05:48<16:05,  3.70it/s]

Epoch: 1, Loss: 5.678263187408447


Processing epoch 00:  26%|██▋       | 1282/4850 [05:48<16:02,  3.71it/s]

Epoch: 1, Loss: 5.149430274963379


Processing epoch 00:  26%|██▋       | 1283/4850 [05:48<15:58,  3.72it/s]

Epoch: 1, Loss: 5.214057445526123


Processing epoch 00:  26%|██▋       | 1284/4850 [05:49<15:59,  3.72it/s]

Epoch: 1, Loss: 4.945556163787842


Processing epoch 00:  26%|██▋       | 1285/4850 [05:49<16:04,  3.70it/s]

Epoch: 1, Loss: 5.067357540130615


Processing epoch 00:  27%|██▋       | 1286/4850 [05:49<15:59,  3.72it/s]

Epoch: 1, Loss: 4.415369987487793


Processing epoch 00:  27%|██▋       | 1287/4850 [05:50<16:00,  3.71it/s]

Epoch: 1, Loss: 4.187952995300293


Processing epoch 00:  27%|██▋       | 1288/4850 [05:50<16:01,  3.71it/s]

Epoch: 1, Loss: 5.370718479156494


Processing epoch 00:  27%|██▋       | 1289/4850 [05:50<15:58,  3.72it/s]

Epoch: 1, Loss: 4.778423309326172


Processing epoch 00:  27%|██▋       | 1290/4850 [05:50<15:59,  3.71it/s]

Epoch: 1, Loss: 4.834774971008301


Processing epoch 00:  27%|██▋       | 1291/4850 [05:51<15:55,  3.73it/s]

Epoch: 1, Loss: 6.138401508331299


Processing epoch 00:  27%|██▋       | 1292/4850 [05:51<15:57,  3.72it/s]

Epoch: 1, Loss: 3.933682918548584


Processing epoch 00:  27%|██▋       | 1293/4850 [05:51<15:56,  3.72it/s]

Epoch: 1, Loss: 4.827193260192871


Processing epoch 00:  27%|██▋       | 1294/4850 [05:51<16:06,  3.68it/s]

Epoch: 1, Loss: 4.235632419586182


Processing epoch 00:  27%|██▋       | 1295/4850 [05:52<16:07,  3.67it/s]

Epoch: 1, Loss: 5.629414081573486


Processing epoch 00:  27%|██▋       | 1296/4850 [05:52<16:04,  3.69it/s]

Epoch: 1, Loss: 4.592004299163818


Processing epoch 00:  27%|██▋       | 1297/4850 [05:52<15:57,  3.71it/s]

Epoch: 1, Loss: 5.561418533325195


Processing epoch 00:  27%|██▋       | 1298/4850 [05:53<15:57,  3.71it/s]

Epoch: 1, Loss: 4.1879167556762695


Processing epoch 00:  27%|██▋       | 1299/4850 [05:53<15:58,  3.70it/s]

Epoch: 1, Loss: 4.266729831695557


Processing epoch 00:  27%|██▋       | 1300/4850 [05:53<15:55,  3.72it/s]

Epoch: 1, Loss: 4.931017875671387


Processing epoch 00:  27%|██▋       | 1301/4850 [05:53<15:51,  3.73it/s]

Epoch: 1, Loss: 4.5586466789245605


Processing epoch 00:  27%|██▋       | 1302/4850 [05:54<15:53,  3.72it/s]

Epoch: 1, Loss: 4.767817497253418


Processing epoch 00:  27%|██▋       | 1303/4850 [05:54<15:53,  3.72it/s]

Epoch: 1, Loss: 4.380020618438721


Processing epoch 00:  27%|██▋       | 1304/4850 [05:54<15:52,  3.72it/s]

Epoch: 1, Loss: 5.594761848449707


Processing epoch 00:  27%|██▋       | 1305/4850 [05:54<15:50,  3.73it/s]

Epoch: 1, Loss: 5.947366714477539


Processing epoch 00:  27%|██▋       | 1306/4850 [05:55<16:19,  3.62it/s]

Epoch: 1, Loss: 4.131960391998291


Processing epoch 00:  27%|██▋       | 1307/4850 [05:55<16:24,  3.60it/s]

Epoch: 1, Loss: 4.502471446990967


Processing epoch 00:  27%|██▋       | 1308/4850 [05:55<16:26,  3.59it/s]

Epoch: 1, Loss: 4.816304683685303


Processing epoch 00:  27%|██▋       | 1309/4850 [05:56<16:44,  3.52it/s]

Epoch: 1, Loss: 4.301058292388916


Processing epoch 00:  27%|██▋       | 1310/4850 [05:56<16:58,  3.48it/s]

Epoch: 1, Loss: 4.095466136932373


Processing epoch 00:  27%|██▋       | 1311/4850 [05:56<17:15,  3.42it/s]

Epoch: 1, Loss: 5.273111343383789


Processing epoch 00:  27%|██▋       | 1312/4850 [05:56<17:12,  3.42it/s]

Epoch: 1, Loss: 4.507039546966553


Processing epoch 00:  27%|██▋       | 1313/4850 [05:57<17:05,  3.45it/s]

Epoch: 1, Loss: 4.768043518066406


Processing epoch 00:  27%|██▋       | 1314/4850 [05:57<16:56,  3.48it/s]

Epoch: 1, Loss: 4.726591110229492


Processing epoch 00:  27%|██▋       | 1315/4850 [05:57<17:04,  3.45it/s]

Epoch: 1, Loss: 5.268342971801758


Processing epoch 00:  27%|██▋       | 1316/4850 [05:58<16:42,  3.53it/s]

Epoch: 1, Loss: 5.72913122177124


Processing epoch 00:  27%|██▋       | 1317/4850 [05:58<16:29,  3.57it/s]

Epoch: 1, Loss: 4.932436943054199


Processing epoch 00:  27%|██▋       | 1318/4850 [05:58<16:16,  3.62it/s]

Epoch: 1, Loss: 3.7646484375


Processing epoch 00:  27%|██▋       | 1319/4850 [05:58<16:14,  3.62it/s]

Epoch: 1, Loss: 4.756773948669434


Processing epoch 00:  27%|██▋       | 1320/4850 [05:59<16:10,  3.64it/s]

Epoch: 1, Loss: 5.108061790466309


Processing epoch 00:  27%|██▋       | 1321/4850 [05:59<16:04,  3.66it/s]

Epoch: 1, Loss: 4.480131149291992


Processing epoch 00:  27%|██▋       | 1322/4850 [05:59<16:01,  3.67it/s]

Epoch: 1, Loss: 4.5664448738098145


Processing epoch 00:  27%|██▋       | 1323/4850 [05:59<15:59,  3.68it/s]

Epoch: 1, Loss: 4.294024467468262


Processing epoch 00:  27%|██▋       | 1324/4850 [06:00<16:02,  3.66it/s]

Epoch: 1, Loss: 3.3057398796081543


Processing epoch 00:  27%|██▋       | 1325/4850 [06:00<16:03,  3.66it/s]

Epoch: 1, Loss: 5.290709018707275


Processing epoch 00:  27%|██▋       | 1326/4850 [06:00<16:03,  3.66it/s]

Epoch: 1, Loss: 4.547898292541504


Processing epoch 00:  27%|██▋       | 1327/4850 [06:01<15:56,  3.68it/s]

Epoch: 1, Loss: 5.26973295211792


Processing epoch 00:  27%|██▋       | 1328/4850 [06:01<15:53,  3.70it/s]

Epoch: 1, Loss: 4.458289623260498


Processing epoch 00:  27%|██▋       | 1329/4850 [06:01<15:54,  3.69it/s]

Epoch: 1, Loss: 4.716672897338867


Processing epoch 00:  27%|██▋       | 1330/4850 [06:01<15:53,  3.69it/s]

Epoch: 1, Loss: 4.962994575500488


Processing epoch 00:  27%|██▋       | 1331/4850 [06:02<15:56,  3.68it/s]

Epoch: 1, Loss: 4.863129138946533


Processing epoch 00:  27%|██▋       | 1332/4850 [06:02<15:54,  3.69it/s]

Epoch: 1, Loss: 4.235525131225586


Processing epoch 00:  27%|██▋       | 1333/4850 [06:02<15:50,  3.70it/s]

Epoch: 1, Loss: 4.924196720123291


Processing epoch 00:  28%|██▊       | 1334/4850 [06:02<15:52,  3.69it/s]

Epoch: 1, Loss: 4.7822957038879395


Processing epoch 00:  28%|██▊       | 1335/4850 [06:03<16:00,  3.66it/s]

Epoch: 1, Loss: 3.7799201011657715


Processing epoch 00:  28%|██▊       | 1336/4850 [06:03<15:54,  3.68it/s]

Epoch: 1, Loss: 5.189599514007568


Processing epoch 00:  28%|██▊       | 1337/4850 [06:03<15:54,  3.68it/s]

Epoch: 1, Loss: 4.291143417358398


Processing epoch 00:  28%|██▊       | 1338/4850 [06:04<15:54,  3.68it/s]

Epoch: 1, Loss: 3.995319366455078


Processing epoch 00:  28%|██▊       | 1339/4850 [06:04<15:52,  3.69it/s]

Epoch: 1, Loss: 5.124677658081055


Processing epoch 00:  28%|██▊       | 1340/4850 [06:04<15:49,  3.70it/s]

Epoch: 1, Loss: 4.853420734405518


Processing epoch 00:  28%|██▊       | 1341/4850 [06:04<15:46,  3.71it/s]

Epoch: 1, Loss: 4.93562650680542


Processing epoch 00:  28%|██▊       | 1342/4850 [06:05<15:51,  3.69it/s]

Epoch: 1, Loss: 4.163216590881348


Processing epoch 00:  28%|██▊       | 1343/4850 [06:05<15:48,  3.70it/s]

Epoch: 1, Loss: 3.8170690536499023


Processing epoch 00:  28%|██▊       | 1344/4850 [06:05<15:46,  3.71it/s]

Epoch: 1, Loss: 4.802901268005371


Processing epoch 00:  28%|██▊       | 1345/4850 [06:05<15:45,  3.71it/s]

Epoch: 1, Loss: 4.927510738372803


Processing epoch 00:  28%|██▊       | 1346/4850 [06:06<15:44,  3.71it/s]

Epoch: 1, Loss: 4.276852607727051


Processing epoch 00:  28%|██▊       | 1347/4850 [06:06<15:44,  3.71it/s]

Epoch: 1, Loss: 4.389608860015869


Processing epoch 00:  28%|██▊       | 1348/4850 [06:06<15:39,  3.73it/s]

Epoch: 1, Loss: 4.547338485717773


Processing epoch 00:  28%|██▊       | 1349/4850 [06:07<15:36,  3.74it/s]

Epoch: 1, Loss: 4.850829601287842


Processing epoch 00:  28%|██▊       | 1350/4850 [06:07<15:31,  3.76it/s]

Epoch: 1, Loss: 5.5136919021606445


Processing epoch 00:  28%|██▊       | 1351/4850 [06:07<15:40,  3.72it/s]

Epoch: 1, Loss: 4.001285552978516


Processing epoch 00:  28%|██▊       | 1352/4850 [06:07<15:51,  3.68it/s]

Epoch: 1, Loss: 3.9108400344848633


Processing epoch 00:  28%|██▊       | 1353/4850 [06:08<16:09,  3.61it/s]

Epoch: 1, Loss: 4.140377998352051


Processing epoch 00:  28%|██▊       | 1354/4850 [06:08<16:33,  3.52it/s]

Epoch: 1, Loss: 4.5430731773376465


Processing epoch 00:  28%|██▊       | 1355/4850 [06:08<16:24,  3.55it/s]

Epoch: 1, Loss: 6.037444114685059


Processing epoch 00:  28%|██▊       | 1356/4850 [06:08<16:23,  3.55it/s]

Epoch: 1, Loss: 4.740703582763672


Processing epoch 00:  28%|██▊       | 1357/4850 [06:09<16:20,  3.56it/s]

Epoch: 1, Loss: 5.036312103271484


Processing epoch 00:  28%|██▊       | 1358/4850 [06:09<16:26,  3.54it/s]

Epoch: 1, Loss: 4.9489521980285645


Processing epoch 00:  28%|██▊       | 1359/4850 [06:09<16:48,  3.46it/s]

Epoch: 1, Loss: 4.198342800140381


Processing epoch 00:  28%|██▊       | 1360/4850 [06:10<16:44,  3.47it/s]

Epoch: 1, Loss: 5.523867607116699


Processing epoch 00:  28%|██▊       | 1361/4850 [06:10<16:43,  3.48it/s]

Epoch: 1, Loss: 5.519796848297119


Processing epoch 00:  28%|██▊       | 1362/4850 [06:10<16:22,  3.55it/s]

Epoch: 1, Loss: 5.252383708953857


Processing epoch 00:  28%|██▊       | 1363/4850 [06:10<16:10,  3.59it/s]

Epoch: 1, Loss: 4.574680328369141


Processing epoch 00:  28%|██▊       | 1364/4850 [06:11<15:58,  3.64it/s]

Epoch: 1, Loss: 4.760563850402832


Processing epoch 00:  28%|██▊       | 1365/4850 [06:11<15:51,  3.66it/s]

Epoch: 1, Loss: 5.294730186462402


Processing epoch 00:  28%|██▊       | 1366/4850 [06:11<15:41,  3.70it/s]

Epoch: 1, Loss: 5.514893531799316


Processing epoch 00:  28%|██▊       | 1367/4850 [06:12<15:39,  3.71it/s]

Epoch: 1, Loss: 4.269465923309326


Processing epoch 00:  28%|██▊       | 1368/4850 [06:12<15:40,  3.70it/s]

Epoch: 1, Loss: 5.182687759399414


Processing epoch 00:  28%|██▊       | 1369/4850 [06:12<15:56,  3.64it/s]

Epoch: 1, Loss: 3.6384823322296143


Processing epoch 00:  28%|██▊       | 1370/4850 [06:12<15:51,  3.66it/s]

Epoch: 1, Loss: 4.423013210296631


Processing epoch 00:  28%|██▊       | 1371/4850 [06:13<15:48,  3.67it/s]

Epoch: 1, Loss: 3.915562152862549


Processing epoch 00:  28%|██▊       | 1372/4850 [06:13<15:43,  3.69it/s]

Epoch: 1, Loss: 4.26384973526001


Processing epoch 00:  28%|██▊       | 1373/4850 [06:13<15:35,  3.72it/s]

Epoch: 1, Loss: 5.377852439880371


Processing epoch 00:  28%|██▊       | 1374/4850 [06:13<15:37,  3.71it/s]

Epoch: 1, Loss: 4.485713958740234


Processing epoch 00:  28%|██▊       | 1375/4850 [06:14<15:39,  3.70it/s]

Epoch: 1, Loss: 5.602009296417236


Processing epoch 00:  28%|██▊       | 1376/4850 [06:14<15:39,  3.70it/s]

Epoch: 1, Loss: 5.070417404174805


Processing epoch 00:  28%|██▊       | 1377/4850 [06:14<15:39,  3.70it/s]

Epoch: 1, Loss: 5.2248663902282715


Processing epoch 00:  28%|██▊       | 1378/4850 [06:15<15:41,  3.69it/s]

Epoch: 1, Loss: 5.070827960968018


Processing epoch 00:  28%|██▊       | 1379/4850 [06:15<15:44,  3.68it/s]

Epoch: 1, Loss: 4.958656311035156


Processing epoch 00:  28%|██▊       | 1380/4850 [06:15<15:51,  3.65it/s]

Epoch: 1, Loss: 4.5425543785095215


Processing epoch 00:  28%|██▊       | 1381/4850 [06:15<15:49,  3.66it/s]

Epoch: 1, Loss: 3.8139076232910156


Processing epoch 00:  28%|██▊       | 1382/4850 [06:16<15:39,  3.69it/s]

Epoch: 1, Loss: 5.546425819396973


Processing epoch 00:  29%|██▊       | 1383/4850 [06:16<15:34,  3.71it/s]

Epoch: 1, Loss: 5.4894819259643555


Processing epoch 00:  29%|██▊       | 1384/4850 [06:16<15:38,  3.69it/s]

Epoch: 1, Loss: 5.0024919509887695


Processing epoch 00:  29%|██▊       | 1385/4850 [06:16<15:39,  3.69it/s]

Epoch: 1, Loss: 4.320296287536621


Processing epoch 00:  29%|██▊       | 1386/4850 [06:17<15:41,  3.68it/s]

Epoch: 1, Loss: 4.663943290710449


Processing epoch 00:  29%|██▊       | 1387/4850 [06:17<15:36,  3.70it/s]

Epoch: 1, Loss: 4.608561038970947


Processing epoch 00:  29%|██▊       | 1388/4850 [06:17<15:32,  3.71it/s]

Epoch: 1, Loss: 5.005163192749023


Processing epoch 00:  29%|██▊       | 1389/4850 [06:18<15:34,  3.70it/s]

Epoch: 1, Loss: 4.825756549835205


Processing epoch 00:  29%|██▊       | 1390/4850 [06:18<15:36,  3.70it/s]

Epoch: 1, Loss: 5.101475715637207


Processing epoch 00:  29%|██▊       | 1391/4850 [06:18<15:36,  3.69it/s]

Epoch: 1, Loss: 4.075628757476807


Processing epoch 00:  29%|██▊       | 1392/4850 [06:18<15:37,  3.69it/s]

Epoch: 1, Loss: 3.7632033824920654


Processing epoch 00:  29%|██▊       | 1393/4850 [06:19<15:34,  3.70it/s]

Epoch: 1, Loss: 4.438081741333008


Processing epoch 00:  29%|██▊       | 1394/4850 [06:19<15:34,  3.70it/s]

Epoch: 1, Loss: 4.847828388214111


Processing epoch 00:  29%|██▉       | 1395/4850 [06:19<15:35,  3.69it/s]

Epoch: 1, Loss: 5.004948616027832


Processing epoch 00:  29%|██▉       | 1396/4850 [06:19<15:29,  3.71it/s]

Epoch: 1, Loss: 5.003357410430908


Processing epoch 00:  29%|██▉       | 1397/4850 [06:20<15:36,  3.69it/s]

Epoch: 1, Loss: 4.394396781921387


Processing epoch 00:  29%|██▉       | 1398/4850 [06:20<15:36,  3.69it/s]

Epoch: 1, Loss: 5.509718894958496


Processing epoch 00:  29%|██▉       | 1399/4850 [06:20<15:58,  3.60it/s]

Epoch: 1, Loss: 4.334081649780273


Processing epoch 00:  29%|██▉       | 1400/4850 [06:21<16:08,  3.56it/s]

Epoch: 1, Loss: 4.402023792266846


Processing epoch 00:  29%|██▉       | 1401/4850 [06:21<16:17,  3.53it/s]

Epoch: 1, Loss: 4.091170310974121


Processing epoch 00:  29%|██▉       | 1402/4850 [06:21<16:23,  3.50it/s]

Epoch: 1, Loss: 5.122076034545898


Processing epoch 00:  29%|██▉       | 1403/4850 [06:21<16:27,  3.49it/s]

Epoch: 1, Loss: 4.817199230194092


Processing epoch 00:  29%|██▉       | 1404/4850 [06:22<16:29,  3.48it/s]

Epoch: 1, Loss: 4.370665550231934


Processing epoch 00:  29%|██▉       | 1405/4850 [06:22<16:25,  3.50it/s]

Epoch: 1, Loss: 5.50969123840332


Processing epoch 00:  29%|██▉       | 1406/4850 [06:22<16:22,  3.51it/s]

Epoch: 1, Loss: 4.367687225341797


Processing epoch 00:  29%|██▉       | 1407/4850 [06:23<16:21,  3.51it/s]

Epoch: 1, Loss: 5.170284271240234


Processing epoch 00:  29%|██▉       | 1408/4850 [06:23<16:41,  3.44it/s]

Epoch: 1, Loss: 4.244182586669922


Processing epoch 00:  29%|██▉       | 1409/4850 [06:23<16:17,  3.52it/s]

Epoch: 1, Loss: 5.134398460388184


Processing epoch 00:  29%|██▉       | 1410/4850 [06:23<16:02,  3.57it/s]

Epoch: 1, Loss: 4.872443199157715


Processing epoch 00:  29%|██▉       | 1411/4850 [06:24<15:52,  3.61it/s]

Epoch: 1, Loss: 4.582404613494873


Processing epoch 00:  29%|██▉       | 1412/4850 [06:24<15:40,  3.66it/s]

Epoch: 1, Loss: 5.085495948791504


Processing epoch 00:  29%|██▉       | 1413/4850 [06:24<15:29,  3.70it/s]

Epoch: 1, Loss: 5.224951267242432


Processing epoch 00:  29%|██▉       | 1414/4850 [06:24<15:27,  3.70it/s]

Epoch: 1, Loss: 4.026226043701172


Processing epoch 00:  29%|██▉       | 1415/4850 [06:25<15:32,  3.68it/s]

Epoch: 1, Loss: 4.270191192626953


Processing epoch 00:  29%|██▉       | 1416/4850 [06:25<15:29,  3.69it/s]

Epoch: 1, Loss: 4.909481048583984


Processing epoch 00:  29%|██▉       | 1417/4850 [06:25<15:26,  3.70it/s]

Epoch: 1, Loss: 4.891168117523193


Processing epoch 00:  29%|██▉       | 1418/4850 [06:26<15:33,  3.67it/s]

Epoch: 1, Loss: 4.786587715148926


Processing epoch 00:  29%|██▉       | 1419/4850 [06:26<15:28,  3.69it/s]

Epoch: 1, Loss: 4.711498737335205


Processing epoch 00:  29%|██▉       | 1420/4850 [06:26<15:31,  3.68it/s]

Epoch: 1, Loss: 4.216300964355469


Processing epoch 00:  29%|██▉       | 1421/4850 [06:26<15:28,  3.69it/s]

Epoch: 1, Loss: 3.99892258644104


Processing epoch 00:  29%|██▉       | 1422/4850 [06:27<15:41,  3.64it/s]

Epoch: 1, Loss: 5.496855735778809


Processing epoch 00:  29%|██▉       | 1423/4850 [06:27<15:48,  3.61it/s]

Epoch: 1, Loss: 4.869834899902344


Processing epoch 00:  29%|██▉       | 1424/4850 [06:27<16:09,  3.53it/s]

Epoch: 1, Loss: 4.797908782958984


Processing epoch 00:  29%|██▉       | 1425/4850 [06:27<16:15,  3.51it/s]

Epoch: 1, Loss: 4.527626991271973


Processing epoch 00:  29%|██▉       | 1426/4850 [06:28<16:16,  3.51it/s]

Epoch: 1, Loss: 4.790170669555664


Processing epoch 00:  29%|██▉       | 1427/4850 [06:28<16:21,  3.49it/s]

Epoch: 1, Loss: 4.427012920379639


Processing epoch 00:  29%|██▉       | 1428/4850 [06:28<16:17,  3.50it/s]

Epoch: 1, Loss: 5.611067295074463


Processing epoch 00:  29%|██▉       | 1429/4850 [06:29<16:28,  3.46it/s]

Epoch: 1, Loss: 4.461497783660889


Processing epoch 00:  29%|██▉       | 1430/4850 [06:29<16:24,  3.47it/s]

Epoch: 1, Loss: 4.527435302734375


Processing epoch 00:  30%|██▉       | 1431/4850 [06:29<16:28,  3.46it/s]

Epoch: 1, Loss: 4.529035568237305


Processing epoch 00:  30%|██▉       | 1432/4850 [06:30<16:11,  3.52it/s]

Epoch: 1, Loss: 4.227417469024658


Processing epoch 00:  30%|██▉       | 1433/4850 [06:30<16:05,  3.54it/s]

Epoch: 1, Loss: 4.365251541137695


Processing epoch 00:  30%|██▉       | 1434/4850 [06:30<15:53,  3.58it/s]

Epoch: 1, Loss: 5.2966156005859375


Processing epoch 00:  30%|██▉       | 1435/4850 [06:30<15:44,  3.61it/s]

Epoch: 1, Loss: 3.7664923667907715


Processing epoch 00:  30%|██▉       | 1436/4850 [06:31<15:36,  3.65it/s]

Epoch: 1, Loss: 4.721244812011719


Processing epoch 00:  30%|██▉       | 1437/4850 [06:31<15:34,  3.65it/s]

Epoch: 1, Loss: 5.564323902130127


Processing epoch 00:  30%|██▉       | 1438/4850 [06:31<15:27,  3.68it/s]

Epoch: 1, Loss: 4.5396318435668945


Processing epoch 00:  30%|██▉       | 1439/4850 [06:31<15:26,  3.68it/s]

Epoch: 1, Loss: 4.189397811889648


Processing epoch 00:  30%|██▉       | 1440/4850 [06:32<15:27,  3.68it/s]

Epoch: 1, Loss: 3.964454174041748


Processing epoch 00:  30%|██▉       | 1441/4850 [06:32<15:28,  3.67it/s]

Epoch: 1, Loss: 4.575829029083252


Processing epoch 00:  30%|██▉       | 1442/4850 [06:32<15:26,  3.68it/s]

Epoch: 1, Loss: 4.383695125579834


Processing epoch 00:  30%|██▉       | 1443/4850 [06:32<15:25,  3.68it/s]

Epoch: 1, Loss: 5.128504276275635


Processing epoch 00:  30%|██▉       | 1444/4850 [06:33<15:23,  3.69it/s]

Epoch: 1, Loss: 4.596780300140381


Processing epoch 00:  30%|██▉       | 1445/4850 [06:33<15:50,  3.58it/s]

Epoch: 1, Loss: 4.705165863037109


Processing epoch 00:  30%|██▉       | 1446/4850 [06:33<15:52,  3.57it/s]

Epoch: 1, Loss: 4.68366813659668


Processing epoch 00:  30%|██▉       | 1447/4850 [06:34<16:01,  3.54it/s]

Epoch: 1, Loss: 4.558540344238281


Processing epoch 00:  30%|██▉       | 1448/4850 [06:34<16:08,  3.51it/s]

Epoch: 1, Loss: 4.477832794189453


Processing epoch 00:  30%|██▉       | 1449/4850 [06:34<16:05,  3.52it/s]

Epoch: 1, Loss: 4.021292209625244


Processing epoch 00:  30%|██▉       | 1450/4850 [06:34<16:19,  3.47it/s]

Epoch: 1, Loss: 4.200115203857422


Processing epoch 00:  30%|██▉       | 1451/4850 [06:35<16:17,  3.48it/s]

Epoch: 1, Loss: 5.030037879943848


Processing epoch 00:  30%|██▉       | 1452/4850 [06:35<16:32,  3.42it/s]

Epoch: 1, Loss: 5.230790615081787


Processing epoch 00:  30%|██▉       | 1453/4850 [06:35<16:34,  3.42it/s]

Epoch: 1, Loss: 4.06099796295166


Processing epoch 00:  30%|██▉       | 1454/4850 [06:36<16:27,  3.44it/s]

Epoch: 1, Loss: 4.31793212890625


Processing epoch 00:  30%|███       | 1455/4850 [06:36<16:04,  3.52it/s]

Epoch: 1, Loss: 4.431432723999023


Processing epoch 00:  30%|███       | 1456/4850 [06:36<15:50,  3.57it/s]

Epoch: 1, Loss: 3.9195048809051514


Processing epoch 00:  30%|███       | 1457/4850 [06:36<15:37,  3.62it/s]

Epoch: 1, Loss: 5.05050802230835


Processing epoch 00:  30%|███       | 1458/4850 [06:37<15:31,  3.64it/s]

Epoch: 1, Loss: 5.0008440017700195


Processing epoch 00:  30%|███       | 1459/4850 [06:37<15:28,  3.65it/s]

Epoch: 1, Loss: 5.611539840698242


Processing epoch 00:  30%|███       | 1460/4850 [06:37<15:25,  3.66it/s]

Epoch: 1, Loss: 4.343103885650635


Processing epoch 00:  30%|███       | 1461/4850 [06:38<15:21,  3.68it/s]

Epoch: 1, Loss: 3.9465532302856445


Processing epoch 00:  30%|███       | 1462/4850 [06:38<15:20,  3.68it/s]

Epoch: 1, Loss: 4.258708953857422


Processing epoch 00:  30%|███       | 1463/4850 [06:38<15:22,  3.67it/s]

Epoch: 1, Loss: 4.426233291625977


Processing epoch 00:  30%|███       | 1464/4850 [06:38<15:18,  3.69it/s]

Epoch: 1, Loss: 4.278494834899902


Processing epoch 00:  30%|███       | 1465/4850 [06:39<15:17,  3.69it/s]

Epoch: 1, Loss: 4.970795631408691


Processing epoch 00:  30%|███       | 1466/4850 [06:39<15:15,  3.70it/s]

Epoch: 1, Loss: 4.475362300872803


Processing epoch 00:  30%|███       | 1467/4850 [06:39<15:14,  3.70it/s]

Epoch: 1, Loss: 4.359534740447998


Processing epoch 00:  30%|███       | 1468/4850 [06:39<15:18,  3.68it/s]

Epoch: 1, Loss: 3.8879594802856445


Processing epoch 00:  30%|███       | 1469/4850 [06:40<15:17,  3.69it/s]

Epoch: 1, Loss: 4.54547119140625


Processing epoch 00:  30%|███       | 1470/4850 [06:40<15:16,  3.69it/s]

Epoch: 1, Loss: 5.145328044891357


Processing epoch 00:  30%|███       | 1471/4850 [06:40<15:14,  3.69it/s]

Epoch: 1, Loss: 4.782604217529297


Processing epoch 00:  30%|███       | 1472/4850 [06:41<15:23,  3.66it/s]

Epoch: 1, Loss: 3.8401081562042236


Processing epoch 00:  30%|███       | 1473/4850 [06:41<15:21,  3.66it/s]

Epoch: 1, Loss: 5.0790557861328125


Processing epoch 00:  30%|███       | 1474/4850 [06:41<15:21,  3.66it/s]

Epoch: 1, Loss: 4.818512439727783


Processing epoch 00:  30%|███       | 1475/4850 [06:41<15:15,  3.69it/s]

Epoch: 1, Loss: 3.8564960956573486


Processing epoch 00:  30%|███       | 1476/4850 [06:42<15:12,  3.70it/s]

Epoch: 1, Loss: 4.949258804321289


Processing epoch 00:  30%|███       | 1477/4850 [06:42<15:13,  3.69it/s]

Epoch: 1, Loss: 3.9329309463500977


Processing epoch 00:  30%|███       | 1478/4850 [06:42<15:17,  3.67it/s]

Epoch: 1, Loss: 4.019916534423828


Processing epoch 00:  30%|███       | 1479/4850 [06:42<15:11,  3.70it/s]

Epoch: 1, Loss: 5.301203727722168


Processing epoch 00:  31%|███       | 1480/4850 [06:43<15:09,  3.71it/s]

Epoch: 1, Loss: 4.287010669708252


Processing epoch 00:  31%|███       | 1481/4850 [06:43<15:07,  3.71it/s]

Epoch: 1, Loss: 3.821239471435547


Processing epoch 00:  31%|███       | 1482/4850 [06:43<15:06,  3.72it/s]

Epoch: 1, Loss: 5.209427356719971


Processing epoch 00:  31%|███       | 1483/4850 [06:44<15:02,  3.73it/s]

Epoch: 1, Loss: 5.249518394470215


Processing epoch 00:  31%|███       | 1484/4850 [06:44<15:03,  3.72it/s]

Epoch: 1, Loss: 4.334674835205078


Processing epoch 00:  31%|███       | 1485/4850 [06:44<15:06,  3.71it/s]

Epoch: 1, Loss: 3.5451676845550537


Processing epoch 00:  31%|███       | 1486/4850 [06:44<15:04,  3.72it/s]

Epoch: 1, Loss: 4.404843807220459


Processing epoch 00:  31%|███       | 1487/4850 [06:45<15:05,  3.71it/s]

Epoch: 1, Loss: 4.169680595397949


Processing epoch 00:  31%|███       | 1488/4850 [06:45<15:05,  3.71it/s]

Epoch: 1, Loss: 4.983238220214844


Processing epoch 00:  31%|███       | 1489/4850 [06:45<15:05,  3.71it/s]

Epoch: 1, Loss: 4.385210990905762


Processing epoch 00:  31%|███       | 1490/4850 [06:45<15:07,  3.70it/s]

Epoch: 1, Loss: 5.476739406585693


Processing epoch 00:  31%|███       | 1491/4850 [06:46<15:21,  3.65it/s]

Epoch: 1, Loss: 4.255387783050537


Processing epoch 00:  31%|███       | 1492/4850 [06:46<15:34,  3.60it/s]

Epoch: 1, Loss: 4.575027942657471


Processing epoch 00:  31%|███       | 1493/4850 [06:46<15:39,  3.57it/s]

Epoch: 1, Loss: 5.013038635253906


Processing epoch 00:  31%|███       | 1494/4850 [06:47<15:50,  3.53it/s]

Epoch: 1, Loss: 4.174574851989746


Processing epoch 00:  31%|███       | 1495/4850 [06:47<15:53,  3.52it/s]

Epoch: 1, Loss: 4.412761211395264


Processing epoch 00:  31%|███       | 1496/4850 [06:47<16:11,  3.45it/s]

Epoch: 1, Loss: 4.667024612426758


Processing epoch 00:  31%|███       | 1497/4850 [06:47<16:11,  3.45it/s]

Epoch: 1, Loss: 4.725730895996094


Processing epoch 00:  31%|███       | 1498/4850 [06:48<16:15,  3.44it/s]

Epoch: 1, Loss: 3.7457010746002197


Processing epoch 00:  31%|███       | 1499/4850 [06:48<16:13,  3.44it/s]

Epoch: 1, Loss: 5.16552209854126


Processing epoch 00:  31%|███       | 1500/4850 [06:48<16:05,  3.47it/s]

Epoch: 1, Loss: 3.6856188774108887


Processing epoch 00:  31%|███       | 1501/4850 [06:49<16:05,  3.47it/s]

Epoch: 1, Loss: 5.777533531188965


Processing epoch 00:  31%|███       | 1502/4850 [06:49<15:47,  3.53it/s]

Epoch: 1, Loss: 4.579617500305176


Processing epoch 00:  31%|███       | 1503/4850 [06:49<15:31,  3.59it/s]

Epoch: 1, Loss: 4.456658363342285


Processing epoch 00:  31%|███       | 1504/4850 [06:49<15:27,  3.61it/s]

Epoch: 1, Loss: 5.59438419342041


Processing epoch 00:  31%|███       | 1505/4850 [06:50<15:19,  3.64it/s]

Epoch: 1, Loss: 5.44392204284668


Processing epoch 00:  31%|███       | 1506/4850 [06:50<15:13,  3.66it/s]

Epoch: 1, Loss: 4.421792030334473


Processing epoch 00:  31%|███       | 1507/4850 [06:50<15:08,  3.68it/s]

Epoch: 1, Loss: 5.212339878082275


Processing epoch 00:  31%|███       | 1508/4850 [06:50<15:06,  3.69it/s]

Epoch: 1, Loss: 4.1964921951293945


Processing epoch 00:  31%|███       | 1509/4850 [06:51<15:02,  3.70it/s]

Epoch: 1, Loss: 5.113640785217285


Processing epoch 00:  31%|███       | 1510/4850 [06:51<15:04,  3.69it/s]

Epoch: 1, Loss: 4.03125


Processing epoch 00:  31%|███       | 1511/4850 [06:51<15:00,  3.71it/s]

Epoch: 1, Loss: 4.412103176116943


Processing epoch 00:  31%|███       | 1512/4850 [06:52<15:02,  3.70it/s]

Epoch: 1, Loss: 4.752934455871582


Processing epoch 00:  31%|███       | 1513/4850 [06:52<15:03,  3.69it/s]

Epoch: 1, Loss: 4.493312835693359


Processing epoch 00:  31%|███       | 1514/4850 [06:52<15:02,  3.70it/s]

Epoch: 1, Loss: 4.883109092712402


Processing epoch 00:  31%|███       | 1515/4850 [06:52<15:02,  3.69it/s]

Epoch: 1, Loss: 4.359343528747559


Processing epoch 00:  31%|███▏      | 1516/4850 [06:53<15:03,  3.69it/s]

Epoch: 1, Loss: 4.590113162994385


Processing epoch 00:  31%|███▏      | 1517/4850 [06:53<15:05,  3.68it/s]

Epoch: 1, Loss: 3.5590434074401855


Processing epoch 00:  31%|███▏      | 1518/4850 [06:53<15:06,  3.68it/s]

Epoch: 1, Loss: 4.080358028411865


Processing epoch 00:  31%|███▏      | 1519/4850 [06:53<15:10,  3.66it/s]

Epoch: 1, Loss: 5.459891319274902


Processing epoch 00:  31%|███▏      | 1520/4850 [06:54<15:06,  3.67it/s]

Epoch: 1, Loss: 4.326309680938721


Processing epoch 00:  31%|███▏      | 1521/4850 [06:54<15:00,  3.69it/s]

Epoch: 1, Loss: 5.104175567626953


Processing epoch 00:  31%|███▏      | 1522/4850 [06:54<15:03,  3.68it/s]

Epoch: 1, Loss: 4.773956775665283


Processing epoch 00:  31%|███▏      | 1523/4850 [06:55<15:02,  3.69it/s]

Epoch: 1, Loss: 3.965453863143921


Processing epoch 00:  31%|███▏      | 1524/4850 [06:55<15:00,  3.69it/s]

Epoch: 1, Loss: 5.0998640060424805


Processing epoch 00:  31%|███▏      | 1525/4850 [06:55<15:01,  3.69it/s]

Epoch: 1, Loss: 3.9000964164733887


Processing epoch 00:  31%|███▏      | 1526/4850 [06:55<15:01,  3.69it/s]

Epoch: 1, Loss: 4.2262983322143555


Processing epoch 00:  31%|███▏      | 1527/4850 [06:56<15:01,  3.69it/s]

Epoch: 1, Loss: 4.158618927001953


Processing epoch 00:  32%|███▏      | 1528/4850 [06:56<15:03,  3.68it/s]

Epoch: 1, Loss: 4.899013996124268


Processing epoch 00:  32%|███▏      | 1529/4850 [06:56<14:59,  3.69it/s]

Epoch: 1, Loss: 4.472115516662598


Processing epoch 00:  32%|███▏      | 1530/4850 [06:56<15:00,  3.69it/s]

Epoch: 1, Loss: 4.212349891662598


Processing epoch 00:  32%|███▏      | 1531/4850 [06:57<14:55,  3.71it/s]

Epoch: 1, Loss: 5.015836238861084


Processing epoch 00:  32%|███▏      | 1532/4850 [06:57<14:50,  3.73it/s]

Epoch: 1, Loss: 4.702542781829834


Processing epoch 00:  32%|███▏      | 1533/4850 [06:57<14:50,  3.72it/s]

Epoch: 1, Loss: 3.6385488510131836


Processing epoch 00:  32%|███▏      | 1534/4850 [06:58<14:47,  3.74it/s]

Epoch: 1, Loss: 5.073284149169922


Processing epoch 00:  32%|███▏      | 1535/4850 [06:58<14:47,  3.74it/s]

Epoch: 1, Loss: 4.501036643981934


Processing epoch 00:  32%|███▏      | 1536/4850 [06:58<14:50,  3.72it/s]

Epoch: 1, Loss: 4.290459632873535


Processing epoch 00:  32%|███▏      | 1537/4850 [06:58<14:50,  3.72it/s]

Epoch: 1, Loss: 3.525129795074463


Processing epoch 00:  32%|███▏      | 1538/4850 [06:59<15:00,  3.68it/s]

Epoch: 1, Loss: 4.407433032989502


Processing epoch 00:  32%|███▏      | 1539/4850 [06:59<15:11,  3.63it/s]

Epoch: 1, Loss: 4.68226432800293


Processing epoch 00:  32%|███▏      | 1540/4850 [06:59<15:25,  3.58it/s]

Epoch: 1, Loss: 4.812083721160889


Processing epoch 00:  32%|███▏      | 1541/4850 [06:59<15:31,  3.55it/s]

Epoch: 1, Loss: 3.9866199493408203


Processing epoch 00:  32%|███▏      | 1542/4850 [07:00<15:32,  3.55it/s]

Epoch: 1, Loss: 4.760223865509033


Processing epoch 00:  32%|███▏      | 1543/4850 [07:00<15:32,  3.54it/s]

Epoch: 1, Loss: 4.971364498138428


Processing epoch 00:  32%|███▏      | 1544/4850 [07:00<15:42,  3.51it/s]

Epoch: 1, Loss: 4.3502702713012695


Processing epoch 00:  32%|███▏      | 1545/4850 [07:01<15:42,  3.51it/s]

Epoch: 1, Loss: 5.041321277618408


Processing epoch 00:  32%|███▏      | 1546/4850 [07:01<15:46,  3.49it/s]

Epoch: 1, Loss: 4.224505424499512


Processing epoch 00:  32%|███▏      | 1547/4850 [07:01<15:51,  3.47it/s]

Epoch: 1, Loss: 4.48604154586792


Processing epoch 00:  32%|███▏      | 1548/4850 [07:01<16:11,  3.40it/s]

Epoch: 1, Loss: 5.053517818450928


Processing epoch 00:  32%|███▏      | 1549/4850 [07:02<15:48,  3.48it/s]

Epoch: 1, Loss: 4.284463882446289


Processing epoch 00:  32%|███▏      | 1550/4850 [07:02<15:29,  3.55it/s]

Epoch: 1, Loss: 4.650393486022949


Processing epoch 00:  32%|███▏      | 1551/4850 [07:02<15:19,  3.59it/s]

Epoch: 1, Loss: 4.312524795532227


Processing epoch 00:  32%|███▏      | 1552/4850 [07:03<15:13,  3.61it/s]

Epoch: 1, Loss: 3.66239595413208


Processing epoch 00:  32%|███▏      | 1553/4850 [07:03<15:07,  3.63it/s]

Epoch: 1, Loss: 4.082096576690674


Processing epoch 00:  32%|███▏      | 1554/4850 [07:03<15:03,  3.65it/s]

Epoch: 1, Loss: 4.018605709075928


Processing epoch 00:  32%|███▏      | 1555/4850 [07:03<14:58,  3.67it/s]

Epoch: 1, Loss: 4.11201810836792


Processing epoch 00:  32%|███▏      | 1556/4850 [07:04<14:58,  3.67it/s]

Epoch: 1, Loss: 4.47119140625


Processing epoch 00:  32%|███▏      | 1557/4850 [07:04<14:55,  3.68it/s]

Epoch: 1, Loss: 4.627860069274902


Processing epoch 00:  32%|███▏      | 1558/4850 [07:04<14:49,  3.70it/s]

Epoch: 1, Loss: 4.8757405281066895


Processing epoch 00:  32%|███▏      | 1559/4850 [07:04<14:46,  3.71it/s]

Epoch: 1, Loss: 4.941840648651123


Processing epoch 00:  32%|███▏      | 1560/4850 [07:05<14:53,  3.68it/s]

Epoch: 1, Loss: 4.355687618255615


Processing epoch 00:  32%|███▏      | 1561/4850 [07:05<14:47,  3.70it/s]

Epoch: 1, Loss: 5.356299877166748


Processing epoch 00:  32%|███▏      | 1562/4850 [07:05<14:46,  3.71it/s]

Epoch: 1, Loss: 4.906673431396484


Processing epoch 00:  32%|███▏      | 1563/4850 [07:06<14:47,  3.70it/s]

Epoch: 1, Loss: 4.363072395324707


Processing epoch 00:  32%|███▏      | 1564/4850 [07:06<14:56,  3.67it/s]

Epoch: 1, Loss: 4.8045654296875


Processing epoch 00:  32%|███▏      | 1565/4850 [07:06<14:53,  3.68it/s]

Epoch: 1, Loss: 4.909668922424316


Processing epoch 00:  32%|███▏      | 1566/4850 [07:06<14:52,  3.68it/s]

Epoch: 1, Loss: 5.12706995010376


Processing epoch 00:  32%|███▏      | 1567/4850 [07:07<14:49,  3.69it/s]

Epoch: 1, Loss: 4.7121405601501465


Processing epoch 00:  32%|███▏      | 1568/4850 [07:07<14:49,  3.69it/s]

Epoch: 1, Loss: 3.6925265789031982


Processing epoch 00:  32%|███▏      | 1569/4850 [07:07<14:46,  3.70it/s]

Epoch: 1, Loss: 4.0906500816345215


Processing epoch 00:  32%|███▏      | 1570/4850 [07:07<14:45,  3.71it/s]

Epoch: 1, Loss: 4.406220436096191


Processing epoch 00:  32%|███▏      | 1571/4850 [07:08<14:41,  3.72it/s]

Epoch: 1, Loss: 4.8193488121032715


Processing epoch 00:  32%|███▏      | 1572/4850 [07:08<14:42,  3.71it/s]

Epoch: 1, Loss: 4.868703842163086


Processing epoch 00:  32%|███▏      | 1573/4850 [07:08<14:41,  3.72it/s]

Epoch: 1, Loss: 4.184208869934082


Processing epoch 00:  32%|███▏      | 1574/4850 [07:09<14:38,  3.73it/s]

Epoch: 1, Loss: 4.13732385635376


Processing epoch 00:  32%|███▏      | 1575/4850 [07:09<14:43,  3.71it/s]

Epoch: 1, Loss: 3.9795937538146973


Processing epoch 00:  32%|███▏      | 1576/4850 [07:09<14:43,  3.71it/s]

Epoch: 1, Loss: 4.032573699951172


Processing epoch 00:  33%|███▎      | 1577/4850 [07:09<14:41,  3.71it/s]

Epoch: 1, Loss: 4.30722188949585


Processing epoch 00:  33%|███▎      | 1578/4850 [07:10<14:40,  3.71it/s]

Epoch: 1, Loss: 4.651900291442871


Processing epoch 00:  33%|███▎      | 1579/4850 [07:10<14:46,  3.69it/s]

Epoch: 1, Loss: 3.8463640213012695


Processing epoch 00:  33%|███▎      | 1580/4850 [07:10<14:44,  3.70it/s]

Epoch: 1, Loss: 4.3355560302734375


Processing epoch 00:  33%|███▎      | 1581/4850 [07:10<14:43,  3.70it/s]

Epoch: 1, Loss: 4.1512370109558105


Processing epoch 00:  33%|███▎      | 1582/4850 [07:11<14:47,  3.68it/s]

Epoch: 1, Loss: 3.8395347595214844


Processing epoch 00:  33%|███▎      | 1583/4850 [07:11<14:55,  3.65it/s]

Epoch: 1, Loss: 4.253185749053955


Processing epoch 00:  33%|███▎      | 1584/4850 [07:11<14:48,  3.67it/s]

Epoch: 1, Loss: 4.712861061096191


Processing epoch 00:  33%|███▎      | 1585/4850 [07:11<14:45,  3.69it/s]

Epoch: 1, Loss: 4.291106224060059


Processing epoch 00:  33%|███▎      | 1586/4850 [07:12<14:58,  3.63it/s]

Epoch: 1, Loss: 4.537522315979004


Processing epoch 00:  33%|███▎      | 1587/4850 [07:12<15:08,  3.59it/s]

Epoch: 1, Loss: 5.510778903961182


Processing epoch 00:  33%|███▎      | 1588/4850 [07:12<15:13,  3.57it/s]

Epoch: 1, Loss: 3.953321933746338


Processing epoch 00:  33%|███▎      | 1589/4850 [07:13<15:18,  3.55it/s]

Epoch: 1, Loss: 4.196228981018066


Processing epoch 00:  33%|███▎      | 1590/4850 [07:13<15:19,  3.55it/s]

Epoch: 1, Loss: 5.073782920837402


Processing epoch 00:  33%|███▎      | 1591/4850 [07:13<15:44,  3.45it/s]

Epoch: 1, Loss: 4.923538684844971


Processing epoch 00:  33%|███▎      | 1592/4850 [07:14<15:40,  3.46it/s]

Epoch: 1, Loss: 3.7526497840881348


Processing epoch 00:  33%|███▎      | 1593/4850 [07:14<15:57,  3.40it/s]

Epoch: 1, Loss: 4.694293022155762


Processing epoch 00:  33%|███▎      | 1594/4850 [07:14<16:01,  3.39it/s]

Epoch: 1, Loss: 4.736227512359619


Processing epoch 00:  33%|███▎      | 1595/4850 [07:14<15:50,  3.42it/s]

Epoch: 1, Loss: 4.9870285987854


Processing epoch 00:  33%|███▎      | 1596/4850 [07:15<15:31,  3.49it/s]

Epoch: 1, Loss: 5.000306606292725


Processing epoch 00:  33%|███▎      | 1597/4850 [07:15<15:16,  3.55it/s]

Epoch: 1, Loss: 4.567390441894531


Processing epoch 00:  33%|███▎      | 1598/4850 [07:15<15:05,  3.59it/s]

Epoch: 1, Loss: 4.9449920654296875


Processing epoch 00:  33%|███▎      | 1599/4850 [07:15<14:57,  3.62it/s]

Epoch: 1, Loss: 4.358870506286621


Processing epoch 00:  33%|███▎      | 1600/4850 [07:16<14:51,  3.65it/s]

Epoch: 1, Loss: 4.481182098388672


Processing epoch 00:  33%|███▎      | 1601/4850 [07:16<14:47,  3.66it/s]

Epoch: 1, Loss: 4.2694807052612305


Processing epoch 00:  33%|███▎      | 1602/4850 [07:16<14:44,  3.67it/s]

Epoch: 1, Loss: 4.156671524047852


Processing epoch 00:  33%|███▎      | 1603/4850 [07:17<14:44,  3.67it/s]

Epoch: 1, Loss: 4.818758010864258


Processing epoch 00:  33%|███▎      | 1604/4850 [07:17<14:42,  3.68it/s]

Epoch: 1, Loss: 5.1838860511779785


Processing epoch 00:  33%|███▎      | 1605/4850 [07:17<14:45,  3.66it/s]

Epoch: 1, Loss: 3.4100871086120605


Processing epoch 00:  33%|███▎      | 1606/4850 [07:17<14:42,  3.67it/s]

Epoch: 1, Loss: 4.21269416809082


Processing epoch 00:  33%|███▎      | 1607/4850 [07:18<14:39,  3.69it/s]

Epoch: 1, Loss: 4.046887397766113


Processing epoch 00:  33%|███▎      | 1608/4850 [07:18<14:38,  3.69it/s]

Epoch: 1, Loss: 4.96299409866333


Processing epoch 00:  33%|███▎      | 1609/4850 [07:18<14:41,  3.68it/s]

Epoch: 1, Loss: 6.143224716186523


Processing epoch 00:  33%|███▎      | 1610/4850 [07:18<14:41,  3.68it/s]

Epoch: 1, Loss: 4.092713832855225


Processing epoch 00:  33%|███▎      | 1611/4850 [07:19<14:40,  3.68it/s]

Epoch: 1, Loss: 4.238770484924316


Processing epoch 00:  33%|███▎      | 1612/4850 [07:19<14:34,  3.70it/s]

Epoch: 1, Loss: 5.257903099060059


Processing epoch 00:  33%|███▎      | 1613/4850 [07:19<14:34,  3.70it/s]

Epoch: 1, Loss: 4.82163143157959


Processing epoch 00:  33%|███▎      | 1614/4850 [07:20<14:39,  3.68it/s]

Epoch: 1, Loss: 4.264842510223389


Processing epoch 00:  33%|███▎      | 1615/4850 [07:20<14:38,  3.68it/s]

Epoch: 1, Loss: 4.457061767578125


Processing epoch 00:  33%|███▎      | 1616/4850 [07:20<14:37,  3.68it/s]

Epoch: 1, Loss: 3.993598461151123


Processing epoch 00:  33%|███▎      | 1617/4850 [07:20<14:37,  3.69it/s]

Epoch: 1, Loss: 5.0681562423706055


Processing epoch 00:  33%|███▎      | 1618/4850 [07:21<14:34,  3.70it/s]

Epoch: 1, Loss: 4.638191223144531


Processing epoch 00:  33%|███▎      | 1619/4850 [07:21<14:33,  3.70it/s]

Epoch: 1, Loss: 4.002812385559082


Processing epoch 00:  33%|███▎      | 1620/4850 [07:21<14:35,  3.69it/s]

Epoch: 1, Loss: 4.3087615966796875


Processing epoch 00:  33%|███▎      | 1621/4850 [07:21<14:37,  3.68it/s]

Epoch: 1, Loss: 4.311079025268555


Processing epoch 00:  33%|███▎      | 1622/4850 [07:22<14:34,  3.69it/s]

Epoch: 1, Loss: 4.371469020843506


Processing epoch 00:  33%|███▎      | 1623/4850 [07:22<14:35,  3.69it/s]

Epoch: 1, Loss: 4.299072742462158


Processing epoch 00:  33%|███▎      | 1624/4850 [07:22<14:39,  3.67it/s]

Epoch: 1, Loss: 4.97456693649292


Processing epoch 00:  34%|███▎      | 1625/4850 [07:23<14:44,  3.65it/s]

Epoch: 1, Loss: 4.465646266937256


Processing epoch 00:  34%|███▎      | 1626/4850 [07:23<14:44,  3.65it/s]

Epoch: 1, Loss: 4.551696300506592


Processing epoch 00:  34%|███▎      | 1627/4850 [07:23<14:38,  3.67it/s]

Epoch: 1, Loss: 4.899311065673828


Processing epoch 00:  34%|███▎      | 1628/4850 [07:23<14:32,  3.69it/s]

Epoch: 1, Loss: 5.342759132385254


Processing epoch 00:  34%|███▎      | 1629/4850 [07:24<14:30,  3.70it/s]

Epoch: 1, Loss: 5.551975727081299


Processing epoch 00:  34%|███▎      | 1630/4850 [07:24<14:28,  3.71it/s]

Epoch: 1, Loss: 3.872241973876953


Processing epoch 00:  34%|███▎      | 1631/4850 [07:24<14:32,  3.69it/s]

Epoch: 1, Loss: 3.4390273094177246


Processing epoch 00:  34%|███▎      | 1632/4850 [07:24<14:55,  3.59it/s]

Epoch: 1, Loss: 3.8268628120422363


Processing epoch 00:  34%|███▎      | 1633/4850 [07:25<15:13,  3.52it/s]

Epoch: 1, Loss: 5.003623008728027


Processing epoch 00:  34%|███▎      | 1634/4850 [07:25<15:20,  3.49it/s]

Epoch: 1, Loss: 4.214570045471191


Processing epoch 00:  34%|███▎      | 1635/4850 [07:25<15:17,  3.50it/s]

Epoch: 1, Loss: 5.814445972442627


Processing epoch 00:  34%|███▎      | 1636/4850 [07:26<15:16,  3.51it/s]

Epoch: 1, Loss: 4.156033039093018


Processing epoch 00:  34%|███▍      | 1637/4850 [07:26<15:20,  3.49it/s]

Epoch: 1, Loss: 4.597209930419922


Processing epoch 00:  34%|███▍      | 1638/4850 [07:26<15:22,  3.48it/s]

Epoch: 1, Loss: 4.498807430267334


Processing epoch 00:  34%|███▍      | 1639/4850 [07:26<15:18,  3.50it/s]

Epoch: 1, Loss: 4.688747882843018


Processing epoch 00:  34%|███▍      | 1640/4850 [07:27<15:29,  3.45it/s]

Epoch: 1, Loss: 3.8079545497894287


Processing epoch 00:  34%|███▍      | 1641/4850 [07:27<15:25,  3.47it/s]

Epoch: 1, Loss: 5.004509925842285


Processing epoch 00:  34%|███▍      | 1642/4850 [07:27<15:07,  3.54it/s]

Epoch: 1, Loss: 4.776603698730469


Processing epoch 00:  34%|███▍      | 1643/4850 [07:28<15:03,  3.55it/s]

Epoch: 1, Loss: 4.294847011566162


Processing epoch 00:  34%|███▍      | 1644/4850 [07:28<14:54,  3.59it/s]

Epoch: 1, Loss: 4.7957000732421875


Processing epoch 00:  34%|███▍      | 1645/4850 [07:28<14:44,  3.62it/s]

Epoch: 1, Loss: 5.218204498291016


Processing epoch 00:  34%|███▍      | 1646/4850 [07:28<14:39,  3.64it/s]

Epoch: 1, Loss: 4.358170032501221


Processing epoch 00:  34%|███▍      | 1647/4850 [07:29<14:33,  3.67it/s]

Epoch: 1, Loss: 5.107828140258789


Processing epoch 00:  34%|███▍      | 1648/4850 [07:29<14:28,  3.69it/s]

Epoch: 1, Loss: 4.776782512664795


Processing epoch 00:  34%|███▍      | 1649/4850 [07:29<14:29,  3.68it/s]

Epoch: 1, Loss: 3.996005058288574


Processing epoch 00:  34%|███▍      | 1650/4850 [07:30<14:35,  3.66it/s]

Epoch: 1, Loss: 4.058137893676758


Processing epoch 00:  34%|███▍      | 1651/4850 [07:30<14:35,  3.65it/s]

Epoch: 1, Loss: 4.4872846603393555


Processing epoch 00:  34%|███▍      | 1652/4850 [07:30<14:36,  3.65it/s]

Epoch: 1, Loss: 3.6978394985198975


Processing epoch 00:  34%|███▍      | 1653/4850 [07:30<14:34,  3.66it/s]

Epoch: 1, Loss: 4.017472743988037


Processing epoch 00:  34%|███▍      | 1654/4850 [07:31<14:35,  3.65it/s]

Epoch: 1, Loss: 4.094000816345215


Processing epoch 00:  34%|███▍      | 1655/4850 [07:31<14:34,  3.65it/s]

Epoch: 1, Loss: 3.432509183883667


Processing epoch 00:  34%|███▍      | 1656/4850 [07:31<14:32,  3.66it/s]

Epoch: 1, Loss: 4.157716274261475


Processing epoch 00:  34%|███▍      | 1657/4850 [07:31<14:31,  3.66it/s]

Epoch: 1, Loss: 4.163465976715088


Processing epoch 00:  34%|███▍      | 1658/4850 [07:32<14:27,  3.68it/s]

Epoch: 1, Loss: 5.102367401123047


Processing epoch 00:  34%|███▍      | 1659/4850 [07:32<14:24,  3.69it/s]

Epoch: 1, Loss: 4.096775054931641


Processing epoch 00:  34%|███▍      | 1660/4850 [07:32<14:24,  3.69it/s]

Epoch: 1, Loss: 4.084300518035889


Processing epoch 00:  34%|███▍      | 1661/4850 [07:33<14:23,  3.69it/s]

Epoch: 1, Loss: 3.3719048500061035


Processing epoch 00:  34%|███▍      | 1662/4850 [07:33<14:31,  3.66it/s]

Epoch: 1, Loss: 3.780590057373047


Processing epoch 00:  34%|███▍      | 1663/4850 [07:33<14:29,  3.66it/s]

Epoch: 1, Loss: 4.260320663452148


Processing epoch 00:  34%|███▍      | 1664/4850 [07:33<14:31,  3.66it/s]

Epoch: 1, Loss: 4.045591831207275


Processing epoch 00:  34%|███▍      | 1665/4850 [07:34<14:32,  3.65it/s]

Epoch: 1, Loss: 4.787148952484131


Processing epoch 00:  34%|███▍      | 1666/4850 [07:34<14:27,  3.67it/s]

Epoch: 1, Loss: 5.291749954223633


Processing epoch 00:  34%|███▍      | 1667/4850 [07:34<14:24,  3.68it/s]

Epoch: 1, Loss: 4.8232927322387695


Processing epoch 00:  34%|███▍      | 1668/4850 [07:34<14:24,  3.68it/s]

Epoch: 1, Loss: 3.9181571006774902


Processing epoch 00:  34%|███▍      | 1669/4850 [07:35<14:22,  3.69it/s]

Epoch: 1, Loss: 4.623737812042236


Processing epoch 00:  34%|███▍      | 1670/4850 [07:35<14:18,  3.70it/s]

Epoch: 1, Loss: 5.13698148727417


Processing epoch 00:  34%|███▍      | 1671/4850 [07:35<14:19,  3.70it/s]

Epoch: 1, Loss: 4.683182239532471


Processing epoch 00:  34%|███▍      | 1672/4850 [07:35<14:20,  3.69it/s]

Epoch: 1, Loss: 3.6280736923217773


Processing epoch 00:  34%|███▍      | 1673/4850 [07:36<14:20,  3.69it/s]

Epoch: 1, Loss: 4.045764923095703


Processing epoch 00:  35%|███▍      | 1674/4850 [07:36<14:21,  3.69it/s]

Epoch: 1, Loss: 4.248294830322266


Processing epoch 00:  35%|███▍      | 1675/4850 [07:36<14:18,  3.70it/s]

Epoch: 1, Loss: 3.580953598022461


Processing epoch 00:  35%|███▍      | 1676/4850 [07:37<14:16,  3.71it/s]

Epoch: 1, Loss: 4.193965435028076


Processing epoch 00:  35%|███▍      | 1677/4850 [07:37<14:17,  3.70it/s]

Epoch: 1, Loss: 4.017097473144531


Processing epoch 00:  35%|███▍      | 1678/4850 [07:37<14:14,  3.71it/s]

Epoch: 1, Loss: 4.621613502502441


Processing epoch 00:  35%|███▍      | 1679/4850 [07:37<14:28,  3.65it/s]

Epoch: 1, Loss: 4.299469470977783


Processing epoch 00:  35%|███▍      | 1680/4850 [07:38<14:48,  3.57it/s]

Epoch: 1, Loss: 5.626347541809082


Processing epoch 00:  35%|███▍      | 1681/4850 [07:38<14:58,  3.53it/s]

Epoch: 1, Loss: 4.052277565002441


Processing epoch 00:  35%|███▍      | 1682/4850 [07:38<14:58,  3.53it/s]

Epoch: 1, Loss: 3.8872485160827637


Processing epoch 00:  35%|███▍      | 1683/4850 [07:39<15:06,  3.49it/s]

Epoch: 1, Loss: 4.016353607177734


Processing epoch 00:  35%|███▍      | 1684/4850 [07:39<15:10,  3.48it/s]

Epoch: 1, Loss: 5.354681968688965


Processing epoch 00:  35%|███▍      | 1685/4850 [07:39<15:16,  3.45it/s]

Epoch: 1, Loss: 4.514698505401611


Processing epoch 00:  35%|███▍      | 1686/4850 [07:39<15:22,  3.43it/s]

Epoch: 1, Loss: 4.7861528396606445


Processing epoch 00:  35%|███▍      | 1687/4850 [07:40<15:22,  3.43it/s]

Epoch: 1, Loss: 4.015923500061035


Processing epoch 00:  35%|███▍      | 1688/4850 [07:40<15:30,  3.40it/s]

Epoch: 1, Loss: 3.511746406555176


Processing epoch 00:  35%|███▍      | 1689/4850 [07:40<15:06,  3.49it/s]

Epoch: 1, Loss: 4.2293500900268555


Processing epoch 00:  35%|███▍      | 1690/4850 [07:41<14:49,  3.55it/s]

Epoch: 1, Loss: 3.9405040740966797


Processing epoch 00:  35%|███▍      | 1691/4850 [07:41<14:49,  3.55it/s]

Epoch: 1, Loss: 4.086336135864258


Processing epoch 00:  35%|███▍      | 1692/4850 [07:41<14:41,  3.58it/s]

Epoch: 1, Loss: 3.2471585273742676


Processing epoch 00:  35%|███▍      | 1693/4850 [07:41<14:33,  3.62it/s]

Epoch: 1, Loss: 4.418217182159424


Processing epoch 00:  35%|███▍      | 1694/4850 [07:42<14:25,  3.64it/s]

Epoch: 1, Loss: 4.187844276428223


Processing epoch 00:  35%|███▍      | 1695/4850 [07:42<14:24,  3.65it/s]

Epoch: 1, Loss: 3.3603930473327637


Processing epoch 00:  35%|███▍      | 1696/4850 [07:42<14:18,  3.67it/s]

Epoch: 1, Loss: 4.813724517822266


Processing epoch 00:  35%|███▍      | 1697/4850 [07:42<14:16,  3.68it/s]

Epoch: 1, Loss: 4.795995712280273


Processing epoch 00:  35%|███▌      | 1698/4850 [07:43<14:17,  3.68it/s]

Epoch: 1, Loss: 3.8238630294799805


Processing epoch 00:  35%|███▌      | 1699/4850 [07:43<14:17,  3.67it/s]

Epoch: 1, Loss: 4.117240905761719


Processing epoch 00:  35%|███▌      | 1700/4850 [07:43<14:16,  3.68it/s]

Epoch: 1, Loss: 4.151014804840088


Processing epoch 00:  35%|███▌      | 1701/4850 [07:44<14:17,  3.67it/s]

Epoch: 1, Loss: 3.834428071975708


Processing epoch 00:  35%|███▌      | 1702/4850 [07:44<14:16,  3.68it/s]

Epoch: 1, Loss: 4.671700954437256


Processing epoch 00:  35%|███▌      | 1703/4850 [07:44<14:15,  3.68it/s]

Epoch: 1, Loss: 4.473162651062012


Processing epoch 00:  35%|███▌      | 1704/4850 [07:44<14:18,  3.67it/s]

Epoch: 1, Loss: 3.592586040496826


Processing epoch 00:  35%|███▌      | 1705/4850 [07:45<14:16,  3.67it/s]

Epoch: 1, Loss: 3.9442005157470703


Processing epoch 00:  35%|███▌      | 1706/4850 [07:45<14:23,  3.64it/s]

Epoch: 1, Loss: 3.9617862701416016


Processing epoch 00:  35%|███▌      | 1707/4850 [07:45<14:18,  3.66it/s]

Epoch: 1, Loss: 4.329133033752441


Processing epoch 00:  35%|███▌      | 1708/4850 [07:45<14:15,  3.67it/s]

Epoch: 1, Loss: 4.729455947875977


Processing epoch 00:  35%|███▌      | 1709/4850 [07:46<14:11,  3.69it/s]

Epoch: 1, Loss: 4.594625949859619


Processing epoch 00:  35%|███▌      | 1710/4850 [07:46<14:10,  3.69it/s]

Epoch: 1, Loss: 4.609586715698242


Processing epoch 00:  35%|███▌      | 1711/4850 [07:46<14:08,  3.70it/s]

Epoch: 1, Loss: 4.594765663146973


Processing epoch 00:  35%|███▌      | 1712/4850 [07:47<14:05,  3.71it/s]

Epoch: 1, Loss: 4.820703029632568


Processing epoch 00:  35%|███▌      | 1713/4850 [07:47<14:08,  3.70it/s]

Epoch: 1, Loss: 3.7279176712036133


Processing epoch 00:  35%|███▌      | 1714/4850 [07:47<14:05,  3.71it/s]

Epoch: 1, Loss: 4.507105827331543


Processing epoch 00:  35%|███▌      | 1715/4850 [07:47<14:08,  3.69it/s]

Epoch: 1, Loss: 4.1575727462768555


Processing epoch 00:  35%|███▌      | 1716/4850 [07:48<14:03,  3.72it/s]

Epoch: 1, Loss: 5.648249626159668


Processing epoch 00:  35%|███▌      | 1717/4850 [07:48<14:05,  3.71it/s]

Epoch: 1, Loss: 4.996405124664307


Processing epoch 00:  35%|███▌      | 1718/4850 [07:48<14:04,  3.71it/s]

Epoch: 1, Loss: 4.870617866516113


Processing epoch 00:  35%|███▌      | 1719/4850 [07:48<14:02,  3.72it/s]

Epoch: 1, Loss: 4.814913749694824


Processing epoch 00:  35%|███▌      | 1720/4850 [07:49<13:59,  3.73it/s]

Epoch: 1, Loss: 4.9093918800354


Processing epoch 00:  35%|███▌      | 1721/4850 [07:49<13:57,  3.73it/s]

Epoch: 1, Loss: 4.230710029602051


Processing epoch 00:  36%|███▌      | 1722/4850 [07:49<14:01,  3.72it/s]

Epoch: 1, Loss: 3.9118287563323975


Processing epoch 00:  36%|███▌      | 1723/4850 [07:50<14:05,  3.70it/s]

Epoch: 1, Loss: 3.8347251415252686


Processing epoch 00:  36%|███▌      | 1724/4850 [07:50<14:10,  3.68it/s]

Epoch: 1, Loss: 4.178395748138428


Processing epoch 00:  36%|███▌      | 1725/4850 [07:50<14:46,  3.53it/s]

Epoch: 1, Loss: 4.15505838394165


Processing epoch 00:  36%|███▌      | 1726/4850 [07:50<15:04,  3.46it/s]

Epoch: 1, Loss: 4.1136627197265625


Processing epoch 00:  36%|███▌      | 1727/4850 [07:51<15:10,  3.43it/s]

Epoch: 1, Loss: 4.222713470458984


Processing epoch 00:  36%|███▌      | 1728/4850 [07:51<15:03,  3.46it/s]

Epoch: 1, Loss: 4.917792320251465


Processing epoch 00:  36%|███▌      | 1729/4850 [07:51<14:59,  3.47it/s]

Epoch: 1, Loss: 4.542438507080078


Processing epoch 00:  36%|███▌      | 1730/4850 [07:52<15:03,  3.45it/s]

Epoch: 1, Loss: 4.120700836181641


Processing epoch 00:  36%|███▌      | 1731/4850 [07:52<14:59,  3.47it/s]

Epoch: 1, Loss: 5.04886531829834


Processing epoch 00:  36%|███▌      | 1732/4850 [07:52<15:03,  3.45it/s]

Epoch: 1, Loss: 4.078356742858887


Processing epoch 00:  36%|███▌      | 1733/4850 [07:52<14:56,  3.48it/s]

Epoch: 1, Loss: 4.867098808288574


Processing epoch 00:  36%|███▌      | 1734/4850 [07:53<14:54,  3.48it/s]

Epoch: 1, Loss: 5.312304973602295


Processing epoch 00:  36%|███▌      | 1735/4850 [07:53<14:41,  3.53it/s]

Epoch: 1, Loss: 4.150821685791016


Processing epoch 00:  36%|███▌      | 1736/4850 [07:53<14:29,  3.58it/s]

Epoch: 1, Loss: 5.168926239013672


Processing epoch 00:  36%|███▌      | 1737/4850 [07:54<14:20,  3.62it/s]

Epoch: 1, Loss: 5.272017002105713


Processing epoch 00:  36%|███▌      | 1738/4850 [07:54<14:14,  3.64it/s]

Epoch: 1, Loss: 4.362451553344727


Processing epoch 00:  36%|███▌      | 1739/4850 [07:54<14:11,  3.65it/s]

Epoch: 1, Loss: 3.4689807891845703


Processing epoch 00:  36%|███▌      | 1740/4850 [07:54<14:10,  3.66it/s]

Epoch: 1, Loss: 4.253190994262695


Processing epoch 00:  36%|███▌      | 1741/4850 [07:55<14:08,  3.67it/s]

Epoch: 1, Loss: 3.4621474742889404


Processing epoch 00:  36%|███▌      | 1742/4850 [07:55<14:02,  3.69it/s]

Epoch: 1, Loss: 4.3122735023498535


Processing epoch 00:  36%|███▌      | 1743/4850 [07:55<14:03,  3.68it/s]

Epoch: 1, Loss: 3.5704281330108643


Processing epoch 00:  36%|███▌      | 1744/4850 [07:55<14:02,  3.69it/s]

Epoch: 1, Loss: 4.777811050415039


Processing epoch 00:  36%|███▌      | 1745/4850 [07:56<13:58,  3.70it/s]

Epoch: 1, Loss: 4.9842071533203125


Processing epoch 00:  36%|███▌      | 1746/4850 [07:56<13:54,  3.72it/s]

Epoch: 1, Loss: 5.56026029586792


Processing epoch 00:  36%|███▌      | 1747/4850 [07:56<13:54,  3.72it/s]

Epoch: 1, Loss: 3.9395127296447754


Processing epoch 00:  36%|███▌      | 1748/4850 [07:57<13:55,  3.71it/s]

Epoch: 1, Loss: 3.3929033279418945


Processing epoch 00:  36%|███▌      | 1749/4850 [07:57<13:56,  3.71it/s]

Epoch: 1, Loss: 4.598565101623535


Processing epoch 00:  36%|███▌      | 1750/4850 [07:57<13:58,  3.70it/s]

Epoch: 1, Loss: 4.050166130065918


Processing epoch 00:  36%|███▌      | 1751/4850 [07:57<14:03,  3.67it/s]

Epoch: 1, Loss: 4.0556230545043945


Processing epoch 00:  36%|███▌      | 1752/4850 [07:58<14:04,  3.67it/s]

Epoch: 1, Loss: 3.796583414077759


Processing epoch 00:  36%|███▌      | 1753/4850 [07:58<14:01,  3.68it/s]

Epoch: 1, Loss: 4.696826934814453


Processing epoch 00:  36%|███▌      | 1754/4850 [07:58<13:56,  3.70it/s]

Epoch: 1, Loss: 5.408378601074219


Processing epoch 00:  36%|███▌      | 1755/4850 [07:58<14:01,  3.68it/s]

Epoch: 1, Loss: 4.409581184387207


Processing epoch 00:  36%|███▌      | 1756/4850 [07:59<13:59,  3.68it/s]

Epoch: 1, Loss: 4.897114276885986


Processing epoch 00:  36%|███▌      | 1757/4850 [07:59<13:59,  3.69it/s]

Epoch: 1, Loss: 4.635095596313477


Processing epoch 00:  36%|███▌      | 1758/4850 [07:59<13:59,  3.68it/s]

Epoch: 1, Loss: 3.450512409210205


Processing epoch 00:  36%|███▋      | 1759/4850 [07:59<13:58,  3.68it/s]

Epoch: 1, Loss: 3.583305835723877


Processing epoch 00:  36%|███▋      | 1760/4850 [08:00<14:00,  3.68it/s]

Epoch: 1, Loss: 4.783672332763672


Processing epoch 00:  36%|███▋      | 1761/4850 [08:00<14:00,  3.68it/s]

Epoch: 1, Loss: 4.487349510192871


Processing epoch 00:  36%|███▋      | 1762/4850 [08:00<13:59,  3.68it/s]

Epoch: 1, Loss: 4.192426681518555


Processing epoch 00:  36%|███▋      | 1763/4850 [08:01<13:59,  3.68it/s]

Epoch: 1, Loss: 3.7642173767089844


Processing epoch 00:  36%|███▋      | 1764/4850 [08:01<13:58,  3.68it/s]

Epoch: 1, Loss: 3.8236074447631836


Processing epoch 00:  36%|███▋      | 1765/4850 [08:01<13:54,  3.70it/s]

Epoch: 1, Loss: 3.7596683502197266


Processing epoch 00:  36%|███▋      | 1766/4850 [08:01<13:51,  3.71it/s]

Epoch: 1, Loss: 5.342276573181152


Processing epoch 00:  36%|███▋      | 1767/4850 [08:02<13:50,  3.71it/s]

Epoch: 1, Loss: 4.779566287994385


Processing epoch 00:  36%|███▋      | 1768/4850 [08:02<13:50,  3.71it/s]

Epoch: 1, Loss: 5.089054584503174


Processing epoch 00:  36%|███▋      | 1769/4850 [08:02<13:48,  3.72it/s]

Epoch: 1, Loss: 4.017233848571777


Processing epoch 00:  36%|███▋      | 1770/4850 [08:02<13:53,  3.69it/s]

Epoch: 1, Loss: 4.94997501373291


Processing epoch 00:  37%|███▋      | 1771/4850 [08:03<13:54,  3.69it/s]

Epoch: 1, Loss: 4.781801223754883


Processing epoch 00:  37%|███▋      | 1772/4850 [08:03<14:12,  3.61it/s]

Epoch: 1, Loss: 3.8734378814697266


Processing epoch 00:  37%|███▋      | 1773/4850 [08:03<14:21,  3.57it/s]

Epoch: 1, Loss: 3.9601855278015137


Processing epoch 00:  37%|███▋      | 1774/4850 [08:04<14:37,  3.51it/s]

Epoch: 1, Loss: 4.030612945556641


Processing epoch 00:  37%|███▋      | 1775/4850 [08:04<14:38,  3.50it/s]

Epoch: 1, Loss: 4.34097957611084


Processing epoch 00:  37%|███▋      | 1776/4850 [08:04<14:45,  3.47it/s]

Epoch: 1, Loss: 4.072725296020508


Processing epoch 00:  37%|███▋      | 1777/4850 [08:04<14:51,  3.45it/s]

Epoch: 1, Loss: 5.470940589904785


Processing epoch 00:  37%|███▋      | 1778/4850 [08:05<14:44,  3.47it/s]

Epoch: 1, Loss: 4.20742654800415


Processing epoch 00:  37%|███▋      | 1779/4850 [08:05<14:35,  3.51it/s]

Epoch: 1, Loss: 5.2686448097229


Processing epoch 00:  37%|███▋      | 1780/4850 [08:05<14:28,  3.54it/s]

Epoch: 1, Loss: 4.965350151062012


Processing epoch 00:  37%|███▋      | 1781/4850 [08:06<14:42,  3.48it/s]

Epoch: 1, Loss: 4.130824089050293


Processing epoch 00:  37%|███▋      | 1782/4850 [08:06<14:48,  3.45it/s]

Epoch: 1, Loss: 4.655235767364502


Processing epoch 00:  37%|███▋      | 1783/4850 [08:06<14:26,  3.54it/s]

Epoch: 1, Loss: 4.073516845703125


Processing epoch 00:  37%|███▋      | 1784/4850 [08:06<14:14,  3.59it/s]

Epoch: 1, Loss: 3.993988037109375


Processing epoch 00:  37%|███▋      | 1785/4850 [08:07<14:05,  3.62it/s]

Epoch: 1, Loss: 4.979435920715332


Processing epoch 00:  37%|███▋      | 1786/4850 [08:07<13:59,  3.65it/s]

Epoch: 1, Loss: 3.240494966506958


Processing epoch 00:  37%|███▋      | 1787/4850 [08:07<13:58,  3.65it/s]

Epoch: 1, Loss: 4.130837440490723


Processing epoch 00:  37%|███▋      | 1788/4850 [08:08<13:58,  3.65it/s]

Epoch: 1, Loss: 4.39013671875


Processing epoch 00:  37%|███▋      | 1789/4850 [08:08<13:56,  3.66it/s]

Epoch: 1, Loss: 4.870562553405762


Processing epoch 00:  37%|███▋      | 1790/4850 [08:08<13:54,  3.67it/s]

Epoch: 1, Loss: 4.995309352874756


Processing epoch 00:  37%|███▋      | 1791/4850 [08:08<13:48,  3.69it/s]

Epoch: 1, Loss: 5.213404655456543


Processing epoch 00:  37%|███▋      | 1792/4850 [08:09<13:48,  3.69it/s]

Epoch: 1, Loss: 3.7532401084899902


Processing epoch 00:  37%|███▋      | 1793/4850 [08:09<13:50,  3.68it/s]

Epoch: 1, Loss: 3.419802188873291


Processing epoch 00:  37%|███▋      | 1794/4850 [08:09<13:45,  3.70it/s]

Epoch: 1, Loss: 4.419531345367432


Processing epoch 00:  37%|███▋      | 1795/4850 [08:09<13:44,  3.70it/s]

Epoch: 1, Loss: 5.2002105712890625


Processing epoch 00:  37%|███▋      | 1796/4850 [08:10<13:52,  3.67it/s]

Epoch: 1, Loss: 4.2859063148498535


Processing epoch 00:  37%|███▋      | 1797/4850 [08:10<13:51,  3.67it/s]

Epoch: 1, Loss: 3.1428818702697754


Processing epoch 00:  37%|███▋      | 1798/4850 [08:10<13:49,  3.68it/s]

Epoch: 1, Loss: 4.796553611755371


Processing epoch 00:  37%|███▋      | 1799/4850 [08:11<13:47,  3.69it/s]

Epoch: 1, Loss: 4.329851150512695


Processing epoch 00:  37%|███▋      | 1800/4850 [08:11<13:47,  3.69it/s]

Epoch: 1, Loss: 3.7420268058776855


Processing epoch 00:  37%|███▋      | 1801/4850 [08:11<13:46,  3.69it/s]

Epoch: 1, Loss: 3.818027973175049


Processing epoch 00:  37%|███▋      | 1802/4850 [08:11<13:46,  3.69it/s]

Epoch: 1, Loss: 3.7438101768493652


Processing epoch 00:  37%|███▋      | 1803/4850 [08:12<13:45,  3.69it/s]

Epoch: 1, Loss: 4.80037784576416


Processing epoch 00:  37%|███▋      | 1804/4850 [08:12<13:43,  3.70it/s]

Epoch: 1, Loss: 5.014529705047607


Processing epoch 00:  37%|███▋      | 1805/4850 [08:12<13:42,  3.70it/s]

Epoch: 1, Loss: 4.178608417510986


Processing epoch 00:  37%|███▋      | 1806/4850 [08:12<13:44,  3.69it/s]

Epoch: 1, Loss: 3.7120211124420166


Processing epoch 00:  37%|███▋      | 1807/4850 [08:13<13:43,  3.69it/s]

Epoch: 1, Loss: 4.465561389923096


Processing epoch 00:  37%|███▋      | 1808/4850 [08:13<13:46,  3.68it/s]

Epoch: 1, Loss: 4.232935428619385


Processing epoch 00:  37%|███▋      | 1809/4850 [08:13<13:44,  3.69it/s]

Epoch: 1, Loss: 5.122432708740234


Processing epoch 00:  37%|███▋      | 1810/4850 [08:13<13:39,  3.71it/s]

Epoch: 1, Loss: 5.377255916595459


Processing epoch 00:  37%|███▋      | 1811/4850 [08:14<13:39,  3.71it/s]

Epoch: 1, Loss: 3.3137757778167725


Processing epoch 00:  37%|███▋      | 1812/4850 [08:14<13:40,  3.70it/s]

Epoch: 1, Loss: 4.233806133270264


Processing epoch 00:  37%|███▋      | 1813/4850 [08:14<13:39,  3.71it/s]

Epoch: 1, Loss: 4.690410614013672


Processing epoch 00:  37%|███▋      | 1814/4850 [08:15<13:43,  3.69it/s]

Epoch: 1, Loss: 3.492995262145996


Processing epoch 00:  37%|███▋      | 1815/4850 [08:15<13:53,  3.64it/s]

Epoch: 1, Loss: 4.189726829528809


Processing epoch 00:  37%|███▋      | 1816/4850 [08:15<13:53,  3.64it/s]

Epoch: 1, Loss: 4.682579040527344


Processing epoch 00:  37%|███▋      | 1817/4850 [08:15<13:50,  3.65it/s]

Epoch: 1, Loss: 4.464663982391357


Processing epoch 00:  37%|███▋      | 1818/4850 [08:16<13:48,  3.66it/s]

Epoch: 1, Loss: 4.602126121520996


Processing epoch 00:  38%|███▊      | 1819/4850 [08:16<13:56,  3.62it/s]

Epoch: 1, Loss: 4.306842803955078


Processing epoch 00:  38%|███▊      | 1820/4850 [08:16<14:24,  3.50it/s]

Epoch: 1, Loss: 3.888723611831665


Processing epoch 00:  38%|███▊      | 1821/4850 [08:17<14:25,  3.50it/s]

Epoch: 1, Loss: 3.641119956970215


Processing epoch 00:  38%|███▊      | 1822/4850 [08:17<14:27,  3.49it/s]

Epoch: 1, Loss: 3.3695716857910156


Processing epoch 00:  38%|███▊      | 1823/4850 [08:17<14:23,  3.51it/s]

Epoch: 1, Loss: 3.975149154663086


Processing epoch 00:  38%|███▊      | 1824/4850 [08:17<14:26,  3.49it/s]

Epoch: 1, Loss: 4.241983890533447


Processing epoch 00:  38%|███▊      | 1825/4850 [08:18<14:27,  3.49it/s]

Epoch: 1, Loss: 3.9061672687530518


Processing epoch 00:  38%|███▊      | 1826/4850 [08:18<14:40,  3.43it/s]

Epoch: 1, Loss: 4.429435729980469


Processing epoch 00:  38%|███▊      | 1827/4850 [08:18<14:40,  3.43it/s]

Epoch: 1, Loss: 3.403092861175537


Processing epoch 00:  38%|███▊      | 1828/4850 [08:19<14:47,  3.40it/s]

Epoch: 1, Loss: 4.4610419273376465


Processing epoch 00:  38%|███▊      | 1829/4850 [08:19<14:22,  3.50it/s]

Epoch: 1, Loss: 5.577341079711914


Processing epoch 00:  38%|███▊      | 1830/4850 [08:19<14:05,  3.57it/s]

Epoch: 1, Loss: 4.692310333251953


Processing epoch 00:  38%|███▊      | 1831/4850 [08:19<13:58,  3.60it/s]

Epoch: 1, Loss: 3.2808890342712402


Processing epoch 00:  38%|███▊      | 1832/4850 [08:20<13:52,  3.62it/s]

Epoch: 1, Loss: 4.218374252319336


Processing epoch 00:  38%|███▊      | 1833/4850 [08:20<13:52,  3.62it/s]

Epoch: 1, Loss: 4.434330940246582


Processing epoch 00:  38%|███▊      | 1834/4850 [08:20<13:49,  3.64it/s]

Epoch: 1, Loss: 4.822682857513428


Processing epoch 00:  38%|███▊      | 1835/4850 [08:20<13:41,  3.67it/s]

Epoch: 1, Loss: 5.280360698699951


Processing epoch 00:  38%|███▊      | 1836/4850 [08:21<13:40,  3.67it/s]

Epoch: 1, Loss: 3.5662741661071777


Processing epoch 00:  38%|███▊      | 1837/4850 [08:21<13:45,  3.65it/s]

Epoch: 1, Loss: 4.548099040985107


Processing epoch 00:  38%|███▊      | 1838/4850 [08:21<13:41,  3.66it/s]

Epoch: 1, Loss: 4.192410469055176


Processing epoch 00:  38%|███▊      | 1839/4850 [08:22<13:39,  3.68it/s]

Epoch: 1, Loss: 3.6581125259399414


Processing epoch 00:  38%|███▊      | 1840/4850 [08:22<13:39,  3.67it/s]

Epoch: 1, Loss: 3.957371950149536


Processing epoch 00:  38%|███▊      | 1841/4850 [08:22<13:40,  3.67it/s]

Epoch: 1, Loss: 3.764697313308716


Processing epoch 00:  38%|███▊      | 1842/4850 [08:22<13:34,  3.69it/s]

Epoch: 1, Loss: 5.350702285766602


Processing epoch 00:  38%|███▊      | 1843/4850 [08:23<13:36,  3.68it/s]

Epoch: 1, Loss: 3.766045570373535


Processing epoch 00:  38%|███▊      | 1844/4850 [08:23<13:37,  3.68it/s]

Epoch: 1, Loss: 3.881718635559082


Processing epoch 00:  38%|███▊      | 1845/4850 [08:23<13:34,  3.69it/s]

Epoch: 1, Loss: 5.089679718017578


Processing epoch 00:  38%|███▊      | 1846/4850 [08:23<13:35,  3.68it/s]

Epoch: 1, Loss: 4.1808624267578125


Processing epoch 00:  38%|███▊      | 1847/4850 [08:24<13:34,  3.69it/s]

Epoch: 1, Loss: 4.328311920166016


Processing epoch 00:  38%|███▊      | 1848/4850 [08:24<13:33,  3.69it/s]

Epoch: 1, Loss: 2.9624462127685547


Processing epoch 00:  38%|███▊      | 1849/4850 [08:24<13:36,  3.67it/s]

Epoch: 1, Loss: 5.630712509155273


Processing epoch 00:  38%|███▊      | 1850/4850 [08:25<13:39,  3.66it/s]

Epoch: 1, Loss: 3.9386844635009766


Processing epoch 00:  38%|███▊      | 1851/4850 [08:25<13:39,  3.66it/s]

Epoch: 1, Loss: 3.993751287460327


Processing epoch 00:  38%|███▊      | 1852/4850 [08:25<13:45,  3.63it/s]

Epoch: 1, Loss: 4.710115909576416


Processing epoch 00:  38%|███▊      | 1853/4850 [08:25<13:40,  3.65it/s]

Epoch: 1, Loss: 4.387824535369873


Processing epoch 00:  38%|███▊      | 1854/4850 [08:26<13:39,  3.66it/s]

Epoch: 1, Loss: 3.821141242980957


Processing epoch 00:  38%|███▊      | 1855/4850 [08:26<13:38,  3.66it/s]

Epoch: 1, Loss: 3.7099509239196777


Processing epoch 00:  38%|███▊      | 1856/4850 [08:26<13:33,  3.68it/s]

Epoch: 1, Loss: 4.658202171325684


Processing epoch 00:  38%|███▊      | 1857/4850 [08:26<13:36,  3.67it/s]

Epoch: 1, Loss: 4.096293926239014


Processing epoch 00:  38%|███▊      | 1858/4850 [08:27<13:33,  3.68it/s]

Epoch: 1, Loss: 4.801258087158203


Processing epoch 00:  38%|███▊      | 1859/4850 [08:27<13:32,  3.68it/s]

Epoch: 1, Loss: 4.999537467956543


Processing epoch 00:  38%|███▊      | 1860/4850 [08:27<13:30,  3.69it/s]

Epoch: 1, Loss: 4.862466812133789


Processing epoch 00:  38%|███▊      | 1861/4850 [08:28<13:28,  3.70it/s]

Epoch: 1, Loss: 4.898595809936523


Processing epoch 00:  38%|███▊      | 1862/4850 [08:28<13:31,  3.68it/s]

Epoch: 1, Loss: 4.666739463806152


Processing epoch 00:  38%|███▊      | 1863/4850 [08:28<13:33,  3.67it/s]

Epoch: 1, Loss: 5.43905782699585


Processing epoch 00:  38%|███▊      | 1864/4850 [08:28<13:32,  3.67it/s]

Epoch: 1, Loss: 3.70639967918396


Processing epoch 00:  38%|███▊      | 1865/4850 [08:29<13:35,  3.66it/s]

Epoch: 1, Loss: 3.757028579711914


Processing epoch 00:  38%|███▊      | 1866/4850 [08:29<13:56,  3.57it/s]

Epoch: 1, Loss: 5.5618696212768555


Processing epoch 00:  38%|███▊      | 1867/4850 [08:29<14:03,  3.54it/s]

Epoch: 1, Loss: 3.870120048522949


Processing epoch 00:  39%|███▊      | 1868/4850 [08:30<14:05,  3.53it/s]

Epoch: 1, Loss: 3.9827804565429688


Processing epoch 00:  39%|███▊      | 1869/4850 [08:30<14:13,  3.49it/s]

Epoch: 1, Loss: 4.960044860839844


Processing epoch 00:  39%|███▊      | 1870/4850 [08:30<14:13,  3.49it/s]

Epoch: 1, Loss: 4.541006088256836


Processing epoch 00:  39%|███▊      | 1871/4850 [08:30<14:34,  3.41it/s]

Epoch: 1, Loss: 3.8342156410217285


Processing epoch 00:  39%|███▊      | 1872/4850 [08:31<14:29,  3.42it/s]

Epoch: 1, Loss: 4.024988651275635


Processing epoch 00:  39%|███▊      | 1873/4850 [08:31<14:31,  3.42it/s]

Epoch: 1, Loss: 4.573190689086914


Processing epoch 00:  39%|███▊      | 1874/4850 [08:31<14:49,  3.34it/s]

Epoch: 1, Loss: 4.852818965911865


Processing epoch 00:  39%|███▊      | 1875/4850 [08:32<14:24,  3.44it/s]

Epoch: 1, Loss: 4.614226818084717


Processing epoch 00:  39%|███▊      | 1876/4850 [08:32<14:06,  3.51it/s]

Epoch: 1, Loss: 4.351261138916016


Processing epoch 00:  39%|███▊      | 1877/4850 [08:32<13:55,  3.56it/s]

Epoch: 1, Loss: 4.693734169006348


Processing epoch 00:  39%|███▊      | 1878/4850 [08:32<13:54,  3.56it/s]

Epoch: 1, Loss: 3.805830955505371


Processing epoch 00:  39%|███▊      | 1879/4850 [08:33<13:44,  3.60it/s]

Epoch: 1, Loss: 4.453473091125488


Processing epoch 00:  39%|███▉      | 1880/4850 [08:33<13:39,  3.62it/s]

Epoch: 1, Loss: 4.125462532043457


Processing epoch 00:  39%|███▉      | 1881/4850 [08:33<13:34,  3.64it/s]

Epoch: 1, Loss: 3.794813632965088


Processing epoch 00:  39%|███▉      | 1882/4850 [08:33<13:29,  3.67it/s]

Epoch: 1, Loss: 4.674055099487305


Processing epoch 00:  39%|███▉      | 1883/4850 [08:34<13:24,  3.69it/s]

Epoch: 1, Loss: 4.790373802185059


Processing epoch 00:  39%|███▉      | 1884/4850 [08:34<13:25,  3.68it/s]

Epoch: 1, Loss: 4.764703750610352


Processing epoch 00:  39%|███▉      | 1885/4850 [08:34<13:23,  3.69it/s]

Epoch: 1, Loss: 5.005546569824219


Processing epoch 00:  39%|███▉      | 1886/4850 [08:35<13:20,  3.70it/s]

Epoch: 1, Loss: 3.901956558227539


Processing epoch 00:  39%|███▉      | 1887/4850 [08:35<13:20,  3.70it/s]

Epoch: 1, Loss: 3.6640138626098633


Processing epoch 00:  39%|███▉      | 1888/4850 [08:35<13:23,  3.69it/s]

Epoch: 1, Loss: 4.300425052642822


Processing epoch 00:  39%|███▉      | 1889/4850 [08:35<13:24,  3.68it/s]

Epoch: 1, Loss: 4.4579596519470215


Processing epoch 00:  39%|███▉      | 1890/4850 [08:36<13:22,  3.69it/s]

Epoch: 1, Loss: 4.365722179412842


Processing epoch 00:  39%|███▉      | 1891/4850 [08:36<13:22,  3.69it/s]

Epoch: 1, Loss: 4.219042778015137


Processing epoch 00:  39%|███▉      | 1892/4850 [08:36<13:21,  3.69it/s]

Epoch: 1, Loss: 4.499779224395752


Processing epoch 00:  39%|███▉      | 1893/4850 [08:36<13:25,  3.67it/s]

Epoch: 1, Loss: 3.998366355895996


Processing epoch 00:  39%|███▉      | 1894/4850 [08:37<13:27,  3.66it/s]

Epoch: 1, Loss: 3.3466312885284424


Processing epoch 00:  39%|███▉      | 1895/4850 [08:37<13:26,  3.67it/s]

Epoch: 1, Loss: 3.5023045539855957


Processing epoch 00:  39%|███▉      | 1896/4850 [08:37<13:25,  3.67it/s]

Epoch: 1, Loss: 4.120253562927246


Processing epoch 00:  39%|███▉      | 1897/4850 [08:38<13:23,  3.68it/s]

Epoch: 1, Loss: 4.0997514724731445


Processing epoch 00:  39%|███▉      | 1898/4850 [08:38<13:25,  3.66it/s]

Epoch: 1, Loss: 4.190491199493408


Processing epoch 00:  39%|███▉      | 1899/4850 [08:38<13:22,  3.68it/s]

Epoch: 1, Loss: 3.7129902839660645


Processing epoch 00:  39%|███▉      | 1900/4850 [08:38<13:25,  3.66it/s]

Epoch: 1, Loss: 4.087653160095215


Processing epoch 00:  39%|███▉      | 1901/4850 [08:39<13:23,  3.67it/s]

Epoch: 1, Loss: 3.520779609680176


Processing epoch 00:  39%|███▉      | 1902/4850 [08:39<13:22,  3.67it/s]

Epoch: 1, Loss: 4.236660480499268


Processing epoch 00:  39%|███▉      | 1903/4850 [08:39<13:23,  3.67it/s]

Epoch: 1, Loss: 4.023908615112305


Processing epoch 00:  39%|███▉      | 1904/4850 [08:39<13:21,  3.68it/s]

Epoch: 1, Loss: 4.921131610870361


Processing epoch 00:  39%|███▉      | 1905/4850 [08:40<13:22,  3.67it/s]

Epoch: 1, Loss: 4.067474365234375


Processing epoch 00:  39%|███▉      | 1906/4850 [08:40<13:22,  3.67it/s]

Epoch: 1, Loss: 4.226128578186035


Processing epoch 00:  39%|███▉      | 1907/4850 [08:40<13:20,  3.67it/s]

Epoch: 1, Loss: 3.6001291275024414


Processing epoch 00:  39%|███▉      | 1908/4850 [08:41<13:19,  3.68it/s]

Epoch: 1, Loss: 4.852029323577881


Processing epoch 00:  39%|███▉      | 1909/4850 [08:41<13:19,  3.68it/s]

Epoch: 1, Loss: 4.00691032409668


Processing epoch 00:  39%|███▉      | 1910/4850 [08:41<13:18,  3.68it/s]

Epoch: 1, Loss: 4.668792724609375


Processing epoch 00:  39%|███▉      | 1911/4850 [08:41<13:16,  3.69it/s]

Epoch: 1, Loss: 5.019148826599121


Processing epoch 00:  39%|███▉      | 1912/4850 [08:42<13:37,  3.59it/s]

Epoch: 1, Loss: 3.9914207458496094


Processing epoch 00:  39%|███▉      | 1913/4850 [08:42<13:58,  3.50it/s]

Epoch: 1, Loss: 3.543550729751587


Processing epoch 00:  39%|███▉      | 1914/4850 [08:42<14:09,  3.45it/s]

Epoch: 1, Loss: 3.535862922668457


Processing epoch 00:  39%|███▉      | 1915/4850 [08:43<14:10,  3.45it/s]

Epoch: 1, Loss: 3.9152235984802246


Processing epoch 00:  40%|███▉      | 1916/4850 [08:43<14:17,  3.42it/s]

Epoch: 1, Loss: 5.114533424377441


Processing epoch 00:  40%|███▉      | 1917/4850 [08:43<14:21,  3.40it/s]

Epoch: 1, Loss: 3.932605743408203


Processing epoch 00:  40%|███▉      | 1918/4850 [08:43<14:16,  3.42it/s]

Epoch: 1, Loss: 3.2352280616760254


Processing epoch 00:  40%|███▉      | 1919/4850 [08:44<14:22,  3.40it/s]

Epoch: 1, Loss: 3.6639609336853027


Processing epoch 00:  40%|███▉      | 1920/4850 [08:44<14:25,  3.39it/s]

Epoch: 1, Loss: 4.163573265075684


Processing epoch 00:  40%|███▉      | 1921/4850 [08:44<14:07,  3.45it/s]

Epoch: 1, Loss: 3.2888689041137695


Processing epoch 00:  40%|███▉      | 1922/4850 [08:45<13:51,  3.52it/s]

Epoch: 1, Loss: 4.555445194244385


Processing epoch 00:  40%|███▉      | 1923/4850 [08:45<13:42,  3.56it/s]

Epoch: 1, Loss: 3.304206371307373


Processing epoch 00:  40%|███▉      | 1924/4850 [08:45<13:34,  3.59it/s]

Epoch: 1, Loss: 3.463378667831421


Processing epoch 00:  40%|███▉      | 1925/4850 [08:45<13:32,  3.60it/s]

Epoch: 1, Loss: 3.8639273643493652


Processing epoch 00:  40%|███▉      | 1926/4850 [08:46<13:27,  3.62it/s]

Epoch: 1, Loss: 4.393579959869385


Processing epoch 00:  40%|███▉      | 1927/4850 [08:46<13:23,  3.64it/s]

Epoch: 1, Loss: 4.540131092071533


Processing epoch 00:  40%|███▉      | 1928/4850 [08:46<13:16,  3.67it/s]

Epoch: 1, Loss: 4.170708179473877


Processing epoch 00:  40%|███▉      | 1929/4850 [08:46<13:16,  3.67it/s]

Epoch: 1, Loss: 4.479738712310791


Processing epoch 00:  40%|███▉      | 1930/4850 [08:47<13:11,  3.69it/s]

Epoch: 1, Loss: 4.779285430908203


Processing epoch 00:  40%|███▉      | 1931/4850 [08:47<13:13,  3.68it/s]

Epoch: 1, Loss: 4.315132141113281


Processing epoch 00:  40%|███▉      | 1932/4850 [08:47<13:10,  3.69it/s]

Epoch: 1, Loss: 4.462274551391602


Processing epoch 00:  40%|███▉      | 1933/4850 [08:48<13:04,  3.72it/s]

Epoch: 1, Loss: 5.843989372253418


Processing epoch 00:  40%|███▉      | 1934/4850 [08:48<13:09,  3.70it/s]

Epoch: 1, Loss: 5.351254463195801


Processing epoch 00:  40%|███▉      | 1935/4850 [08:48<13:11,  3.68it/s]

Epoch: 1, Loss: 3.584216833114624


Processing epoch 00:  40%|███▉      | 1936/4850 [08:48<13:10,  3.68it/s]

Epoch: 1, Loss: 4.577667713165283


Processing epoch 00:  40%|███▉      | 1937/4850 [08:49<13:08,  3.70it/s]

Epoch: 1, Loss: 3.959953546524048


Processing epoch 00:  40%|███▉      | 1938/4850 [08:49<13:05,  3.71it/s]

Epoch: 1, Loss: 4.112432956695557


Processing epoch 00:  40%|███▉      | 1939/4850 [08:49<13:05,  3.71it/s]

Epoch: 1, Loss: 4.321298599243164


Processing epoch 00:  40%|████      | 1940/4850 [08:49<13:05,  3.70it/s]

Epoch: 1, Loss: 3.2191803455352783


Processing epoch 00:  40%|████      | 1941/4850 [08:50<13:08,  3.69it/s]

Epoch: 1, Loss: 3.802133560180664


Processing epoch 00:  40%|████      | 1942/4850 [08:50<13:06,  3.70it/s]

Epoch: 1, Loss: 4.157742977142334


Processing epoch 00:  40%|████      | 1943/4850 [08:50<13:07,  3.69it/s]

Epoch: 1, Loss: 4.3509416580200195


Processing epoch 00:  40%|████      | 1944/4850 [08:51<13:14,  3.66it/s]

Epoch: 1, Loss: 3.713409662246704


Processing epoch 00:  40%|████      | 1945/4850 [08:51<13:10,  3.67it/s]

Epoch: 1, Loss: 5.0067667961120605


Processing epoch 00:  40%|████      | 1946/4850 [08:51<13:07,  3.69it/s]

Epoch: 1, Loss: 4.210378646850586


Processing epoch 00:  40%|████      | 1947/4850 [08:51<13:07,  3.69it/s]

Epoch: 1, Loss: 4.943889141082764


Processing epoch 00:  40%|████      | 1948/4850 [08:52<13:10,  3.67it/s]

Epoch: 1, Loss: 3.9135336875915527


Processing epoch 00:  40%|████      | 1949/4850 [08:52<13:14,  3.65it/s]

Epoch: 1, Loss: 5.018604278564453


Processing epoch 00:  40%|████      | 1950/4850 [08:52<13:13,  3.66it/s]

Epoch: 1, Loss: 4.029106140136719


Processing epoch 00:  40%|████      | 1951/4850 [08:52<13:09,  3.67it/s]

Epoch: 1, Loss: 4.311146259307861


Processing epoch 00:  40%|████      | 1952/4850 [08:53<13:10,  3.66it/s]

Epoch: 1, Loss: 4.332786560058594


Processing epoch 00:  40%|████      | 1953/4850 [08:53<13:10,  3.66it/s]

Epoch: 1, Loss: 3.454030990600586


Processing epoch 00:  40%|████      | 1954/4850 [08:53<13:11,  3.66it/s]

Epoch: 1, Loss: 4.252179145812988


Processing epoch 00:  40%|████      | 1955/4850 [08:54<13:09,  3.67it/s]

Epoch: 1, Loss: 3.7000036239624023


Processing epoch 00:  40%|████      | 1956/4850 [08:54<13:11,  3.66it/s]

Epoch: 1, Loss: 4.247178554534912


Processing epoch 00:  40%|████      | 1957/4850 [08:54<13:08,  3.67it/s]

Epoch: 1, Loss: 3.7851362228393555


Processing epoch 00:  40%|████      | 1958/4850 [08:54<13:27,  3.58it/s]

Epoch: 1, Loss: 3.431462049484253


Processing epoch 00:  40%|████      | 1959/4850 [08:55<13:34,  3.55it/s]

Epoch: 1, Loss: 3.7314743995666504


Processing epoch 00:  40%|████      | 1960/4850 [08:55<13:43,  3.51it/s]

Epoch: 1, Loss: 4.390267372131348


Processing epoch 00:  40%|████      | 1961/4850 [08:55<13:47,  3.49it/s]

Epoch: 1, Loss: 3.915956974029541


Processing epoch 00:  40%|████      | 1962/4850 [08:56<13:44,  3.50it/s]

Epoch: 1, Loss: 4.468263626098633


Processing epoch 00:  40%|████      | 1963/4850 [08:56<13:41,  3.51it/s]

Epoch: 1, Loss: 3.9489150047302246


Processing epoch 00:  40%|████      | 1964/4850 [08:56<13:58,  3.44it/s]

Epoch: 1, Loss: 3.942148208618164


Processing epoch 00:  41%|████      | 1965/4850 [08:56<14:10,  3.39it/s]

Epoch: 1, Loss: 4.0656938552856445


Processing epoch 00:  41%|████      | 1966/4850 [08:57<14:20,  3.35it/s]

Epoch: 1, Loss: 3.677675724029541


Processing epoch 00:  41%|████      | 1967/4850 [08:57<14:19,  3.35it/s]

Epoch: 1, Loss: 5.092687606811523


Processing epoch 00:  41%|████      | 1968/4850 [08:57<13:55,  3.45it/s]

Epoch: 1, Loss: 4.311554431915283


Processing epoch 00:  41%|████      | 1969/4850 [08:58<13:37,  3.52it/s]

Epoch: 1, Loss: 4.251834869384766


Processing epoch 00:  41%|████      | 1970/4850 [08:58<13:26,  3.57it/s]

Epoch: 1, Loss: 4.589654922485352


Processing epoch 00:  41%|████      | 1971/4850 [08:58<13:26,  3.57it/s]

Epoch: 1, Loss: 5.130415439605713


Processing epoch 00:  41%|████      | 1972/4850 [08:58<13:18,  3.61it/s]

Epoch: 1, Loss: 3.428767204284668


Processing epoch 00:  41%|████      | 1973/4850 [08:59<13:12,  3.63it/s]

Epoch: 1, Loss: 3.652801752090454


Processing epoch 00:  41%|████      | 1974/4850 [08:59<13:07,  3.65it/s]

Epoch: 1, Loss: 4.400877475738525


Processing epoch 00:  41%|████      | 1975/4850 [08:59<13:01,  3.68it/s]

Epoch: 1, Loss: 4.890679836273193


Processing epoch 00:  41%|████      | 1976/4850 [08:59<13:00,  3.68it/s]

Epoch: 1, Loss: 3.8227806091308594


Processing epoch 00:  41%|████      | 1977/4850 [09:00<13:01,  3.68it/s]

Epoch: 1, Loss: 3.275820255279541


Processing epoch 00:  41%|████      | 1978/4850 [09:00<13:01,  3.68it/s]

Epoch: 1, Loss: 4.708239555358887


Processing epoch 00:  41%|████      | 1979/4850 [09:00<13:01,  3.67it/s]

Epoch: 1, Loss: 3.6280782222747803


Processing epoch 00:  41%|████      | 1980/4850 [09:01<13:00,  3.68it/s]

Epoch: 1, Loss: 3.903054714202881


Processing epoch 00:  41%|████      | 1981/4850 [09:01<12:59,  3.68it/s]

Epoch: 1, Loss: 4.282252311706543


Processing epoch 00:  41%|████      | 1982/4850 [09:01<12:58,  3.68it/s]

Epoch: 1, Loss: 4.301289081573486


Processing epoch 00:  41%|████      | 1983/4850 [09:01<12:57,  3.69it/s]

Epoch: 1, Loss: 4.4112443923950195


Processing epoch 00:  41%|████      | 1984/4850 [09:02<12:56,  3.69it/s]

Epoch: 1, Loss: 4.107202529907227


Processing epoch 00:  41%|████      | 1985/4850 [09:02<12:58,  3.68it/s]

Epoch: 1, Loss: 3.791350841522217


Processing epoch 00:  41%|████      | 1986/4850 [09:02<13:04,  3.65it/s]

Epoch: 1, Loss: 3.9577574729919434


Processing epoch 00:  41%|████      | 1987/4850 [09:02<13:00,  3.67it/s]

Epoch: 1, Loss: 3.894857406616211


Processing epoch 00:  41%|████      | 1988/4850 [09:03<12:57,  3.68it/s]

Epoch: 1, Loss: 3.9838995933532715


Processing epoch 00:  41%|████      | 1989/4850 [09:03<12:56,  3.68it/s]

Epoch: 1, Loss: 4.076045036315918


Processing epoch 00:  41%|████      | 1990/4850 [09:03<12:54,  3.69it/s]

Epoch: 1, Loss: 4.473041534423828


Processing epoch 00:  41%|████      | 1991/4850 [09:04<12:57,  3.68it/s]

Epoch: 1, Loss: 4.496439456939697


Processing epoch 00:  41%|████      | 1992/4850 [09:04<12:59,  3.67it/s]

Epoch: 1, Loss: 4.484683036804199


Processing epoch 00:  41%|████      | 1993/4850 [09:04<13:00,  3.66it/s]

Epoch: 1, Loss: 4.0482940673828125


Processing epoch 00:  41%|████      | 1994/4850 [09:04<12:58,  3.67it/s]

Epoch: 1, Loss: 4.287132263183594


Processing epoch 00:  41%|████      | 1995/4850 [09:05<12:58,  3.67it/s]

Epoch: 1, Loss: 3.8470795154571533


Processing epoch 00:  41%|████      | 1996/4850 [09:05<12:57,  3.67it/s]

Epoch: 1, Loss: 3.811453342437744


Processing epoch 00:  41%|████      | 1997/4850 [09:05<12:57,  3.67it/s]

Epoch: 1, Loss: 4.082540512084961


Processing epoch 00:  41%|████      | 1998/4850 [09:05<12:55,  3.68it/s]

Epoch: 1, Loss: 4.218205451965332


Processing epoch 00:  41%|████      | 1999/4850 [09:06<12:56,  3.67it/s]

Epoch: 1, Loss: 3.951796293258667


Processing epoch 00:  41%|████      | 2000/4850 [09:06<12:55,  3.67it/s]

Epoch: 1, Loss: 4.125267505645752


Processing epoch 00:  41%|████▏     | 2001/4850 [09:06<13:00,  3.65it/s]

Epoch: 1, Loss: 4.657447814941406


Processing epoch 00:  41%|████▏     | 2002/4850 [09:07<12:58,  3.66it/s]

Epoch: 1, Loss: 4.236447334289551


Processing epoch 00:  41%|████▏     | 2003/4850 [09:07<12:56,  3.67it/s]

Epoch: 1, Loss: 4.62095308303833


Processing epoch 00:  41%|████▏     | 2004/4850 [09:07<13:02,  3.64it/s]

Epoch: 1, Loss: 4.5894012451171875


Processing epoch 00:  41%|████▏     | 2005/4850 [09:07<13:16,  3.57it/s]

Epoch: 1, Loss: 5.002202987670898


Processing epoch 00:  41%|████▏     | 2006/4850 [09:08<13:23,  3.54it/s]

Epoch: 1, Loss: 4.348841667175293


Processing epoch 00:  41%|████▏     | 2007/4850 [09:08<13:21,  3.55it/s]

Epoch: 1, Loss: 4.436929225921631


Processing epoch 00:  41%|████▏     | 2008/4850 [09:08<13:32,  3.50it/s]

Epoch: 1, Loss: 4.039423942565918


Processing epoch 00:  41%|████▏     | 2009/4850 [09:09<13:44,  3.44it/s]

Epoch: 1, Loss: 4.04451322555542


Processing epoch 00:  41%|████▏     | 2010/4850 [09:09<13:43,  3.45it/s]

Epoch: 1, Loss: 4.529759407043457


Processing epoch 00:  41%|████▏     | 2011/4850 [09:09<13:42,  3.45it/s]

Epoch: 1, Loss: 5.1348419189453125


Processing epoch 00:  41%|████▏     | 2012/4850 [09:09<13:53,  3.40it/s]

Epoch: 1, Loss: 3.606724262237549


Processing epoch 00:  42%|████▏     | 2013/4850 [09:10<13:47,  3.43it/s]

Epoch: 1, Loss: 3.641631603240967


Processing epoch 00:  42%|████▏     | 2014/4850 [09:10<13:32,  3.49it/s]

Epoch: 1, Loss: 4.1011061668396


Processing epoch 00:  42%|████▏     | 2015/4850 [09:10<13:18,  3.55it/s]

Epoch: 1, Loss: 3.520590305328369


Processing epoch 00:  42%|████▏     | 2016/4850 [09:11<13:08,  3.59it/s]

Epoch: 1, Loss: 3.29583740234375


Processing epoch 00:  42%|████▏     | 2017/4850 [09:11<13:05,  3.61it/s]

Epoch: 1, Loss: 3.7984752655029297


Processing epoch 00:  42%|████▏     | 2018/4850 [09:11<12:58,  3.64it/s]

Epoch: 1, Loss: 4.17926549911499


Processing epoch 00:  42%|████▏     | 2019/4850 [09:11<12:58,  3.64it/s]

Epoch: 1, Loss: 3.4386789798736572


Processing epoch 00:  42%|████▏     | 2020/4850 [09:12<12:55,  3.65it/s]

Epoch: 1, Loss: 3.8299098014831543


Processing epoch 00:  42%|████▏     | 2021/4850 [09:12<12:52,  3.66it/s]

Epoch: 1, Loss: 3.8422908782958984


Processing epoch 00:  42%|████▏     | 2022/4850 [09:12<12:47,  3.68it/s]

Epoch: 1, Loss: 4.801904201507568


Processing epoch 00:  42%|████▏     | 2023/4850 [09:12<12:46,  3.69it/s]

Epoch: 1, Loss: 4.526998043060303


Processing epoch 00:  42%|████▏     | 2024/4850 [09:13<12:46,  3.69it/s]

Epoch: 1, Loss: 4.74367094039917


Processing epoch 00:  42%|████▏     | 2025/4850 [09:13<12:45,  3.69it/s]

Epoch: 1, Loss: 5.12108039855957


Processing epoch 00:  42%|████▏     | 2026/4850 [09:13<12:43,  3.70it/s]

Epoch: 1, Loss: 4.62735652923584


Processing epoch 00:  42%|████▏     | 2027/4850 [09:14<12:50,  3.67it/s]

Epoch: 1, Loss: 5.2439422607421875


Processing epoch 00:  42%|████▏     | 2028/4850 [09:14<12:48,  3.67it/s]

Epoch: 1, Loss: 4.400000095367432


Processing epoch 00:  42%|████▏     | 2029/4850 [09:14<12:49,  3.67it/s]

Epoch: 1, Loss: 3.383531093597412


Processing epoch 00:  42%|████▏     | 2030/4850 [09:14<12:47,  3.67it/s]

Epoch: 1, Loss: 4.103659629821777


Processing epoch 00:  42%|████▏     | 2031/4850 [09:15<12:51,  3.65it/s]

Epoch: 1, Loss: 3.9085376262664795


Processing epoch 00:  42%|████▏     | 2032/4850 [09:15<12:50,  3.66it/s]

Epoch: 1, Loss: 3.7106194496154785


Processing epoch 00:  42%|████▏     | 2033/4850 [09:15<12:49,  3.66it/s]

Epoch: 1, Loss: 4.465997695922852


Processing epoch 00:  42%|████▏     | 2034/4850 [09:15<12:48,  3.66it/s]

Epoch: 1, Loss: 4.323763847351074


Processing epoch 00:  42%|████▏     | 2035/4850 [09:16<12:47,  3.67it/s]

Epoch: 1, Loss: 3.5982613563537598


Processing epoch 00:  42%|████▏     | 2036/4850 [09:16<12:46,  3.67it/s]

Epoch: 1, Loss: 4.420381546020508


Processing epoch 00:  42%|████▏     | 2037/4850 [09:16<12:42,  3.69it/s]

Epoch: 1, Loss: 4.260049343109131


Processing epoch 00:  42%|████▏     | 2038/4850 [09:17<12:42,  3.69it/s]

Epoch: 1, Loss: 3.7311739921569824


Processing epoch 00:  42%|████▏     | 2039/4850 [09:17<12:43,  3.68it/s]

Epoch: 1, Loss: 4.631205081939697


Processing epoch 00:  42%|████▏     | 2040/4850 [09:17<12:47,  3.66it/s]

Epoch: 1, Loss: 4.326289176940918


Processing epoch 00:  42%|████▏     | 2041/4850 [09:17<12:48,  3.66it/s]

Epoch: 1, Loss: 4.127774238586426


Processing epoch 00:  42%|████▏     | 2042/4850 [09:18<12:52,  3.64it/s]

Epoch: 1, Loss: 3.47843599319458


Processing epoch 00:  42%|████▏     | 2043/4850 [09:18<12:46,  3.66it/s]

Epoch: 1, Loss: 5.054172992706299


Processing epoch 00:  42%|████▏     | 2044/4850 [09:18<12:44,  3.67it/s]

Epoch: 1, Loss: 4.848810195922852


Processing epoch 00:  42%|████▏     | 2045/4850 [09:18<12:45,  3.67it/s]

Epoch: 1, Loss: 4.709336757659912


Processing epoch 00:  42%|████▏     | 2046/4850 [09:19<12:46,  3.66it/s]

Epoch: 1, Loss: 3.6293325424194336


Processing epoch 00:  42%|████▏     | 2047/4850 [09:19<12:41,  3.68it/s]

Epoch: 1, Loss: 5.14804744720459


Processing epoch 00:  42%|████▏     | 2048/4850 [09:19<12:37,  3.70it/s]

Epoch: 1, Loss: 4.496377944946289


Processing epoch 00:  42%|████▏     | 2049/4850 [09:20<12:37,  3.70it/s]

Epoch: 1, Loss: 4.16847038269043


Processing epoch 00:  42%|████▏     | 2050/4850 [09:20<12:42,  3.67it/s]

Epoch: 1, Loss: 3.4409873485565186


Processing epoch 00:  42%|████▏     | 2051/4850 [09:20<13:00,  3.59it/s]

Epoch: 1, Loss: 4.388545036315918


Processing epoch 00:  42%|████▏     | 2052/4850 [09:20<12:59,  3.59it/s]

Epoch: 1, Loss: 5.432614326477051


Processing epoch 00:  42%|████▏     | 2053/4850 [09:21<13:13,  3.53it/s]

Epoch: 1, Loss: 3.756584644317627


Processing epoch 00:  42%|████▏     | 2054/4850 [09:21<13:16,  3.51it/s]

Epoch: 1, Loss: 5.057581901550293


Processing epoch 00:  42%|████▏     | 2055/4850 [09:21<13:17,  3.51it/s]

Epoch: 1, Loss: 4.597558975219727


Processing epoch 00:  42%|████▏     | 2056/4850 [09:22<13:20,  3.49it/s]

Epoch: 1, Loss: 3.7914083003997803


Processing epoch 00:  42%|████▏     | 2057/4850 [09:22<13:32,  3.44it/s]

Epoch: 1, Loss: 4.0127668380737305


Processing epoch 00:  42%|████▏     | 2058/4850 [09:22<13:38,  3.41it/s]

Epoch: 1, Loss: 3.5216476917266846


Processing epoch 00:  42%|████▏     | 2059/4850 [09:22<13:47,  3.37it/s]

Epoch: 1, Loss: 3.5846199989318848


Processing epoch 00:  42%|████▏     | 2060/4850 [09:23<13:38,  3.41it/s]

Epoch: 1, Loss: 4.20596981048584


Processing epoch 00:  42%|████▏     | 2061/4850 [09:23<13:18,  3.49it/s]

Epoch: 1, Loss: 4.700823783874512


Processing epoch 00:  43%|████▎     | 2062/4850 [09:23<13:04,  3.55it/s]

Epoch: 1, Loss: 4.812917232513428


Processing epoch 00:  43%|████▎     | 2063/4850 [09:24<12:59,  3.58it/s]

Epoch: 1, Loss: 3.439100503921509


Processing epoch 00:  43%|████▎     | 2064/4850 [09:24<13:03,  3.56it/s]

Epoch: 1, Loss: 3.570781707763672


Processing epoch 00:  43%|████▎     | 2065/4850 [09:24<12:53,  3.60it/s]

Epoch: 1, Loss: 4.9081525802612305


Processing epoch 00:  43%|████▎     | 2066/4850 [09:24<12:48,  3.62it/s]

Epoch: 1, Loss: 4.839340686798096


Processing epoch 00:  43%|████▎     | 2067/4850 [09:25<12:43,  3.65it/s]

Epoch: 1, Loss: 3.3335506916046143


Processing epoch 00:  43%|████▎     | 2068/4850 [09:25<12:41,  3.65it/s]

Epoch: 1, Loss: 3.4790031909942627


Processing epoch 00:  43%|████▎     | 2069/4850 [09:25<12:37,  3.67it/s]

Epoch: 1, Loss: 4.292135238647461


Processing epoch 00:  43%|████▎     | 2070/4850 [09:25<12:34,  3.69it/s]

Epoch: 1, Loss: 3.8344216346740723


Processing epoch 00:  43%|████▎     | 2071/4850 [09:26<12:34,  3.69it/s]

Epoch: 1, Loss: 4.086613178253174


Processing epoch 00:  43%|████▎     | 2072/4850 [09:26<12:37,  3.67it/s]

Epoch: 1, Loss: 3.1870341300964355


Processing epoch 00:  43%|████▎     | 2073/4850 [09:26<12:36,  3.67it/s]

Epoch: 1, Loss: 4.370677471160889


Processing epoch 00:  43%|████▎     | 2074/4850 [09:27<12:35,  3.68it/s]

Epoch: 1, Loss: 4.772021293640137


Processing epoch 00:  43%|████▎     | 2075/4850 [09:27<12:31,  3.69it/s]

Epoch: 1, Loss: 4.059289932250977


Processing epoch 00:  43%|████▎     | 2076/4850 [09:27<12:31,  3.69it/s]

Epoch: 1, Loss: 4.5022101402282715


Processing epoch 00:  43%|████▎     | 2077/4850 [09:27<12:30,  3.69it/s]

Epoch: 1, Loss: 5.369595527648926


Processing epoch 00:  43%|████▎     | 2078/4850 [09:28<12:37,  3.66it/s]

Epoch: 1, Loss: 4.114612579345703


Processing epoch 00:  43%|████▎     | 2079/4850 [09:28<12:40,  3.64it/s]

Epoch: 1, Loss: 3.9399399757385254


Processing epoch 00:  43%|████▎     | 2080/4850 [09:28<12:36,  3.66it/s]

Epoch: 1, Loss: 3.857013702392578


Processing epoch 00:  43%|████▎     | 2081/4850 [09:28<12:36,  3.66it/s]

Epoch: 1, Loss: 3.499547243118286


Processing epoch 00:  43%|████▎     | 2082/4850 [09:29<12:34,  3.67it/s]

Epoch: 1, Loss: 3.864757537841797


Processing epoch 00:  43%|████▎     | 2083/4850 [09:29<12:34,  3.67it/s]

Epoch: 1, Loss: 3.6678738594055176


Processing epoch 00:  43%|████▎     | 2084/4850 [09:29<12:32,  3.68it/s]

Epoch: 1, Loss: 3.9316458702087402


Processing epoch 00:  43%|████▎     | 2085/4850 [09:30<12:34,  3.66it/s]

Epoch: 1, Loss: 4.1150102615356445


Processing epoch 00:  43%|████▎     | 2086/4850 [09:30<12:33,  3.67it/s]

Epoch: 1, Loss: 4.382911205291748


Processing epoch 00:  43%|████▎     | 2087/4850 [09:30<12:33,  3.67it/s]

Epoch: 1, Loss: 3.6239843368530273


Processing epoch 00:  43%|████▎     | 2088/4850 [09:30<12:30,  3.68it/s]

Epoch: 1, Loss: 4.451594352722168


Processing epoch 00:  43%|████▎     | 2089/4850 [09:31<12:31,  3.68it/s]

Epoch: 1, Loss: 5.256885528564453


Processing epoch 00:  43%|████▎     | 2090/4850 [09:31<12:32,  3.67it/s]

Epoch: 1, Loss: 3.761050224304199


Processing epoch 00:  43%|████▎     | 2091/4850 [09:31<12:28,  3.69it/s]

Epoch: 1, Loss: 4.808765411376953


Processing epoch 00:  43%|████▎     | 2092/4850 [09:31<12:28,  3.68it/s]

Epoch: 1, Loss: 4.481369495391846


Processing epoch 00:  43%|████▎     | 2093/4850 [09:32<12:30,  3.67it/s]

Epoch: 1, Loss: 4.176970958709717


Processing epoch 00:  43%|████▎     | 2094/4850 [09:32<12:35,  3.65it/s]

Epoch: 1, Loss: 3.758664131164551


Processing epoch 00:  43%|████▎     | 2095/4850 [09:32<12:31,  3.67it/s]

Epoch: 1, Loss: 5.238862991333008


Processing epoch 00:  43%|████▎     | 2096/4850 [09:33<12:27,  3.68it/s]

Epoch: 1, Loss: 4.550271034240723


Processing epoch 00:  43%|████▎     | 2097/4850 [09:33<12:27,  3.68it/s]

Epoch: 1, Loss: 4.570547580718994


Processing epoch 00:  43%|████▎     | 2098/4850 [09:33<12:46,  3.59it/s]

Epoch: 1, Loss: 4.611376762390137


Processing epoch 00:  43%|████▎     | 2099/4850 [09:33<13:05,  3.50it/s]

Epoch: 1, Loss: 4.027586936950684


Processing epoch 00:  43%|████▎     | 2100/4850 [09:34<12:57,  3.54it/s]

Epoch: 1, Loss: 5.067774772644043


Processing epoch 00:  43%|████▎     | 2101/4850 [09:34<13:02,  3.51it/s]

Epoch: 1, Loss: 3.4615135192871094


Processing epoch 00:  43%|████▎     | 2102/4850 [09:34<13:03,  3.51it/s]

Epoch: 1, Loss: 3.84696364402771


Processing epoch 00:  43%|████▎     | 2103/4850 [09:35<13:09,  3.48it/s]

Epoch: 1, Loss: 3.672356367111206


Processing epoch 00:  43%|████▎     | 2104/4850 [09:35<13:14,  3.46it/s]

Epoch: 1, Loss: 3.510978937149048


Processing epoch 00:  43%|████▎     | 2105/4850 [09:35<13:21,  3.43it/s]

Epoch: 1, Loss: 3.827694892883301


Processing epoch 00:  43%|████▎     | 2106/4850 [09:35<13:13,  3.46it/s]

Epoch: 1, Loss: 3.5414555072784424


Processing epoch 00:  43%|████▎     | 2107/4850 [09:36<13:13,  3.46it/s]

Epoch: 1, Loss: 4.6286516189575195


Processing epoch 00:  43%|████▎     | 2108/4850 [09:36<12:59,  3.52it/s]

Epoch: 1, Loss: 4.2811079025268555


Processing epoch 00:  43%|████▎     | 2109/4850 [09:36<12:49,  3.56it/s]

Epoch: 1, Loss: 4.470795154571533


Processing epoch 00:  44%|████▎     | 2110/4850 [09:37<12:41,  3.60it/s]

Epoch: 1, Loss: 5.389693260192871


Processing epoch 00:  44%|████▎     | 2111/4850 [09:37<12:37,  3.62it/s]

Epoch: 1, Loss: 3.592813491821289


Processing epoch 00:  44%|████▎     | 2112/4850 [09:37<12:36,  3.62it/s]

Epoch: 1, Loss: 4.52500581741333


Processing epoch 00:  44%|████▎     | 2113/4850 [09:37<12:33,  3.63it/s]

Epoch: 1, Loss: 3.912277936935425


Processing epoch 00:  44%|████▎     | 2114/4850 [09:38<12:39,  3.60it/s]

Epoch: 1, Loss: 3.724839687347412


Processing epoch 00:  44%|████▎     | 2115/4850 [09:38<12:32,  3.63it/s]

Epoch: 1, Loss: 4.232884883880615


Processing epoch 00:  44%|████▎     | 2116/4850 [09:38<12:30,  3.64it/s]

Epoch: 1, Loss: 4.333964824676514


Processing epoch 00:  44%|████▎     | 2117/4850 [09:38<12:28,  3.65it/s]

Epoch: 1, Loss: 3.489452600479126


Processing epoch 00:  44%|████▎     | 2118/4850 [09:39<12:28,  3.65it/s]

Epoch: 1, Loss: 3.8914875984191895


Processing epoch 00:  44%|████▎     | 2119/4850 [09:39<12:24,  3.67it/s]

Epoch: 1, Loss: 4.045502662658691


Processing epoch 00:  44%|████▎     | 2120/4850 [09:39<12:30,  3.64it/s]

Epoch: 1, Loss: 3.724280834197998


Processing epoch 00:  44%|████▎     | 2121/4850 [09:40<12:26,  3.66it/s]

Epoch: 1, Loss: 4.01204252243042


Processing epoch 00:  44%|████▍     | 2122/4850 [09:40<12:25,  3.66it/s]

Epoch: 1, Loss: 3.0672836303710938


Processing epoch 00:  44%|████▍     | 2123/4850 [09:40<12:22,  3.67it/s]

Epoch: 1, Loss: 4.345726013183594


Processing epoch 00:  44%|████▍     | 2124/4850 [09:40<12:21,  3.68it/s]

Epoch: 1, Loss: 4.354783535003662


Processing epoch 00:  44%|████▍     | 2125/4850 [09:41<12:21,  3.68it/s]

Epoch: 1, Loss: 5.263766288757324


Processing epoch 00:  44%|████▍     | 2126/4850 [09:41<12:16,  3.70it/s]

Epoch: 1, Loss: 4.447892189025879


Processing epoch 00:  44%|████▍     | 2127/4850 [09:41<12:17,  3.69it/s]

Epoch: 1, Loss: 4.752089500427246


Processing epoch 00:  44%|████▍     | 2128/4850 [09:41<12:17,  3.69it/s]

Epoch: 1, Loss: 4.075776100158691


Processing epoch 00:  44%|████▍     | 2129/4850 [09:42<12:16,  3.69it/s]

Epoch: 1, Loss: 3.6940760612487793


Processing epoch 00:  44%|████▍     | 2130/4850 [09:42<12:16,  3.69it/s]

Epoch: 1, Loss: 4.022574424743652


Processing epoch 00:  44%|████▍     | 2131/4850 [09:42<12:16,  3.69it/s]

Epoch: 1, Loss: 3.713545322418213


Processing epoch 00:  44%|████▍     | 2132/4850 [09:43<12:17,  3.69it/s]

Epoch: 1, Loss: 3.479191780090332


Processing epoch 00:  44%|████▍     | 2133/4850 [09:43<12:18,  3.68it/s]

Epoch: 1, Loss: 3.3563008308410645


Processing epoch 00:  44%|████▍     | 2134/4850 [09:43<12:18,  3.68it/s]

Epoch: 1, Loss: 3.145845413208008


Processing epoch 00:  44%|████▍     | 2135/4850 [09:43<12:20,  3.67it/s]

Epoch: 1, Loss: 5.1454644203186035


Processing epoch 00:  44%|████▍     | 2136/4850 [09:44<12:19,  3.67it/s]

Epoch: 1, Loss: 4.563755035400391


Processing epoch 00:  44%|████▍     | 2137/4850 [09:44<12:16,  3.68it/s]

Epoch: 1, Loss: 4.4759979248046875


Processing epoch 00:  44%|████▍     | 2138/4850 [09:44<12:15,  3.69it/s]

Epoch: 1, Loss: 4.28108024597168


Processing epoch 00:  44%|████▍     | 2139/4850 [09:44<12:17,  3.68it/s]

Epoch: 1, Loss: 3.5445637702941895


Processing epoch 00:  44%|████▍     | 2140/4850 [09:45<12:16,  3.68it/s]

Epoch: 1, Loss: 3.8947219848632812


Processing epoch 00:  44%|████▍     | 2141/4850 [09:45<12:19,  3.66it/s]

Epoch: 1, Loss: 3.858522415161133


Processing epoch 00:  44%|████▍     | 2142/4850 [09:45<12:19,  3.66it/s]

Epoch: 1, Loss: 4.9257988929748535


Processing epoch 00:  44%|████▍     | 2143/4850 [09:46<12:21,  3.65it/s]

Epoch: 1, Loss: 3.7011399269104004


Processing epoch 00:  44%|████▍     | 2144/4850 [09:46<12:20,  3.65it/s]

Epoch: 1, Loss: 3.8014657497406006


Processing epoch 00:  44%|████▍     | 2145/4850 [09:46<12:37,  3.57it/s]

Epoch: 1, Loss: 3.8829126358032227


Processing epoch 00:  44%|████▍     | 2146/4850 [09:46<12:49,  3.51it/s]

Epoch: 1, Loss: 4.2261271476745605


Processing epoch 00:  44%|████▍     | 2147/4850 [09:47<12:51,  3.50it/s]

Epoch: 1, Loss: 3.345187187194824


Processing epoch 00:  44%|████▍     | 2148/4850 [09:47<12:49,  3.51it/s]

Epoch: 1, Loss: 4.03197717666626


Processing epoch 00:  44%|████▍     | 2149/4850 [09:47<12:49,  3.51it/s]

Epoch: 1, Loss: 3.535630464553833


Processing epoch 00:  44%|████▍     | 2150/4850 [09:48<12:50,  3.50it/s]

Epoch: 1, Loss: 3.418309211730957


Processing epoch 00:  44%|████▍     | 2151/4850 [09:48<12:57,  3.47it/s]

Epoch: 1, Loss: 2.866344928741455


Processing epoch 00:  44%|████▍     | 2152/4850 [09:48<12:56,  3.47it/s]

Epoch: 1, Loss: 3.6471147537231445


Processing epoch 00:  44%|████▍     | 2153/4850 [09:48<13:03,  3.44it/s]

Epoch: 1, Loss: 3.385279893875122


Processing epoch 00:  44%|████▍     | 2154/4850 [09:49<13:08,  3.42it/s]

Epoch: 1, Loss: 3.7282354831695557


Processing epoch 00:  44%|████▍     | 2155/4850 [09:49<12:53,  3.48it/s]

Epoch: 1, Loss: 4.775364875793457


Processing epoch 00:  44%|████▍     | 2156/4850 [09:49<12:43,  3.53it/s]

Epoch: 1, Loss: 3.7101235389709473


Processing epoch 00:  44%|████▍     | 2157/4850 [09:50<12:36,  3.56it/s]

Epoch: 1, Loss: 4.391629219055176


Processing epoch 00:  44%|████▍     | 2158/4850 [09:50<12:42,  3.53it/s]

Epoch: 1, Loss: 4.799928188323975


Processing epoch 00:  45%|████▍     | 2159/4850 [09:50<12:52,  3.48it/s]

Epoch: 1, Loss: 3.5570080280303955


Processing epoch 00:  45%|████▍     | 2160/4850 [09:50<12:59,  3.45it/s]

Epoch: 1, Loss: 3.9571502208709717


Processing epoch 00:  45%|████▍     | 2161/4850 [09:51<13:02,  3.44it/s]

Epoch: 1, Loss: 3.1490421295166016


Processing epoch 00:  45%|████▍     | 2162/4850 [09:51<13:06,  3.42it/s]

Epoch: 1, Loss: 3.4984326362609863


Processing epoch 00:  45%|████▍     | 2163/4850 [09:51<13:03,  3.43it/s]

Epoch: 1, Loss: 3.6676416397094727


Processing epoch 00:  45%|████▍     | 2164/4850 [09:52<13:09,  3.40it/s]

Epoch: 1, Loss: 4.079317569732666


Processing epoch 00:  45%|████▍     | 2165/4850 [09:52<13:13,  3.38it/s]

Epoch: 1, Loss: 4.5724287033081055


Processing epoch 00:  45%|████▍     | 2166/4850 [09:52<13:22,  3.35it/s]

Epoch: 1, Loss: 3.736640453338623


Processing epoch 00:  45%|████▍     | 2167/4850 [09:52<13:14,  3.38it/s]

Epoch: 1, Loss: 5.051091194152832


Processing epoch 00:  45%|████▍     | 2168/4850 [09:53<12:55,  3.46it/s]

Epoch: 1, Loss: 4.02116584777832


Processing epoch 00:  45%|████▍     | 2169/4850 [09:53<12:42,  3.52it/s]

Epoch: 1, Loss: 4.355404853820801


Processing epoch 00:  45%|████▍     | 2170/4850 [09:53<12:30,  3.57it/s]

Epoch: 1, Loss: 3.3786699771881104


Processing epoch 00:  45%|████▍     | 2171/4850 [09:54<12:24,  3.60it/s]

Epoch: 1, Loss: 4.131583213806152


Processing epoch 00:  45%|████▍     | 2172/4850 [09:54<12:19,  3.62it/s]

Epoch: 1, Loss: 3.41998553276062


Processing epoch 00:  45%|████▍     | 2173/4850 [09:54<12:15,  3.64it/s]

Epoch: 1, Loss: 3.4376463890075684


Processing epoch 00:  45%|████▍     | 2174/4850 [09:54<12:11,  3.66it/s]

Epoch: 1, Loss: 4.206031799316406


Processing epoch 00:  45%|████▍     | 2175/4850 [09:55<12:12,  3.65it/s]

Epoch: 1, Loss: 4.184573173522949


Processing epoch 00:  45%|████▍     | 2176/4850 [09:55<12:11,  3.65it/s]

Epoch: 1, Loss: 4.0937581062316895


Processing epoch 00:  45%|████▍     | 2177/4850 [09:55<12:11,  3.66it/s]

Epoch: 1, Loss: 3.7480878829956055


Processing epoch 00:  45%|████▍     | 2178/4850 [09:55<12:12,  3.65it/s]

Epoch: 1, Loss: 3.8902769088745117


Processing epoch 00:  45%|████▍     | 2179/4850 [09:56<12:13,  3.64it/s]

Epoch: 1, Loss: 3.8054728507995605


Processing epoch 00:  45%|████▍     | 2180/4850 [09:56<12:12,  3.65it/s]

Epoch: 1, Loss: 3.7664976119995117


Processing epoch 00:  45%|████▍     | 2181/4850 [09:56<12:09,  3.66it/s]

Epoch: 1, Loss: 4.373295307159424


Processing epoch 00:  45%|████▍     | 2182/4850 [09:57<12:09,  3.66it/s]

Epoch: 1, Loss: 4.1076579093933105


Processing epoch 00:  45%|████▌     | 2183/4850 [09:57<12:14,  3.63it/s]

Epoch: 1, Loss: 3.238628387451172


Processing epoch 00:  45%|████▌     | 2184/4850 [09:57<12:12,  3.64it/s]

Epoch: 1, Loss: 3.244973659515381


Processing epoch 00:  45%|████▌     | 2185/4850 [09:57<12:10,  3.65it/s]

Epoch: 1, Loss: 4.240975379943848


Processing epoch 00:  45%|████▌     | 2186/4850 [09:58<12:11,  3.64it/s]

Epoch: 1, Loss: 3.7237420082092285


Processing epoch 00:  45%|████▌     | 2187/4850 [09:58<12:11,  3.64it/s]

Epoch: 1, Loss: 3.423724889755249


Processing epoch 00:  45%|████▌     | 2188/4850 [09:58<12:12,  3.64it/s]

Epoch: 1, Loss: 3.6885485649108887


Processing epoch 00:  45%|████▌     | 2189/4850 [09:59<12:08,  3.65it/s]

Epoch: 1, Loss: 4.196467399597168


Processing epoch 00:  45%|████▌     | 2190/4850 [09:59<12:09,  3.64it/s]

Epoch: 1, Loss: 3.727390766143799


Processing epoch 00:  45%|████▌     | 2191/4850 [09:59<12:29,  3.55it/s]

Epoch: 1, Loss: 3.453204870223999


Processing epoch 00:  45%|████▌     | 2192/4850 [09:59<12:30,  3.54it/s]

Epoch: 1, Loss: 3.7066149711608887


Processing epoch 00:  45%|████▌     | 2193/4850 [10:00<12:46,  3.47it/s]

Epoch: 1, Loss: 3.748518943786621


Processing epoch 00:  45%|████▌     | 2194/4850 [10:00<12:50,  3.45it/s]

Epoch: 1, Loss: 4.482877731323242


Processing epoch 00:  45%|████▌     | 2195/4850 [10:00<12:43,  3.48it/s]

Epoch: 1, Loss: 4.064859390258789


Processing epoch 00:  45%|████▌     | 2196/4850 [10:01<12:52,  3.44it/s]

Epoch: 1, Loss: 4.394935607910156


Processing epoch 00:  45%|████▌     | 2197/4850 [10:01<12:52,  3.44it/s]

Epoch: 1, Loss: 3.863926649093628


Processing epoch 00:  45%|████▌     | 2198/4850 [10:01<12:54,  3.42it/s]

Epoch: 1, Loss: 3.7813644409179688


Processing epoch 00:  45%|████▌     | 2199/4850 [10:01<12:50,  3.44it/s]

Epoch: 1, Loss: 4.4170756340026855


Processing epoch 00:  45%|████▌     | 2200/4850 [10:02<12:46,  3.46it/s]

Epoch: 1, Loss: 5.000391960144043


Processing epoch 00:  45%|████▌     | 2201/4850 [10:02<12:35,  3.51it/s]

Epoch: 1, Loss: 3.5801048278808594


Processing epoch 00:  45%|████▌     | 2202/4850 [10:02<12:23,  3.56it/s]

Epoch: 1, Loss: 3.633469820022583


Processing epoch 00:  45%|████▌     | 2203/4850 [10:03<12:15,  3.60it/s]

Epoch: 1, Loss: 3.626708984375


Processing epoch 00:  45%|████▌     | 2204/4850 [10:03<12:10,  3.62it/s]

Epoch: 1, Loss: 3.634686231613159


Processing epoch 00:  45%|████▌     | 2205/4850 [10:03<12:18,  3.58it/s]

Epoch: 1, Loss: 3.2145628929138184


Processing epoch 00:  45%|████▌     | 2206/4850 [10:03<12:14,  3.60it/s]

Epoch: 1, Loss: 3.753969430923462


Processing epoch 00:  46%|████▌     | 2207/4850 [10:04<12:11,  3.62it/s]

Epoch: 1, Loss: 3.631880760192871


Processing epoch 00:  46%|████▌     | 2208/4850 [10:04<12:07,  3.63it/s]

Epoch: 1, Loss: 4.680303573608398


Processing epoch 00:  46%|████▌     | 2209/4850 [10:04<12:02,  3.65it/s]

Epoch: 1, Loss: 3.786149501800537


Processing epoch 00:  46%|████▌     | 2210/4850 [10:04<12:03,  3.65it/s]

Epoch: 1, Loss: 3.869845390319824


Processing epoch 00:  46%|████▌     | 2211/4850 [10:05<12:02,  3.66it/s]

Epoch: 1, Loss: 4.602190971374512


Processing epoch 00:  46%|████▌     | 2212/4850 [10:05<12:02,  3.65it/s]

Epoch: 1, Loss: 4.685004234313965


Processing epoch 00:  46%|████▌     | 2213/4850 [10:05<12:04,  3.64it/s]

Epoch: 1, Loss: 4.348974227905273


Processing epoch 00:  46%|████▌     | 2214/4850 [10:06<12:01,  3.65it/s]

Epoch: 1, Loss: 4.397140026092529


Processing epoch 00:  46%|████▌     | 2215/4850 [10:06<12:01,  3.65it/s]

Epoch: 1, Loss: 4.457220077514648


Processing epoch 00:  46%|████▌     | 2216/4850 [10:06<12:01,  3.65it/s]

Epoch: 1, Loss: 4.309691905975342


Processing epoch 00:  46%|████▌     | 2217/4850 [10:06<11:59,  3.66it/s]

Epoch: 1, Loss: 4.023947715759277


Processing epoch 00:  46%|████▌     | 2218/4850 [10:07<11:59,  3.66it/s]

Epoch: 1, Loss: 4.033810615539551


Processing epoch 00:  46%|████▌     | 2219/4850 [10:07<11:55,  3.67it/s]

Epoch: 1, Loss: 3.550954580307007


Processing epoch 00:  46%|████▌     | 2220/4850 [10:07<12:00,  3.65it/s]

Epoch: 1, Loss: 4.7869696617126465


Processing epoch 00:  46%|████▌     | 2221/4850 [10:07<12:01,  3.64it/s]

Epoch: 1, Loss: 3.7235589027404785


Processing epoch 00:  46%|████▌     | 2222/4850 [10:08<11:59,  3.65it/s]

Epoch: 1, Loss: 3.9311037063598633


Processing epoch 00:  46%|████▌     | 2223/4850 [10:08<11:57,  3.66it/s]

Epoch: 1, Loss: 3.6142823696136475


Processing epoch 00:  46%|████▌     | 2224/4850 [10:08<11:56,  3.67it/s]

Epoch: 1, Loss: 3.852586269378662


Processing epoch 00:  46%|████▌     | 2225/4850 [10:09<11:59,  3.65it/s]

Epoch: 1, Loss: 3.577033281326294


Processing epoch 00:  46%|████▌     | 2226/4850 [10:09<11:58,  3.65it/s]

Epoch: 1, Loss: 4.2255401611328125


Processing epoch 00:  46%|████▌     | 2227/4850 [10:09<11:54,  3.67it/s]

Epoch: 1, Loss: 4.151872634887695


Processing epoch 00:  46%|████▌     | 2228/4850 [10:09<11:51,  3.68it/s]

Epoch: 1, Loss: 4.374884128570557


Processing epoch 00:  46%|████▌     | 2229/4850 [10:10<11:51,  3.69it/s]

Epoch: 1, Loss: 4.118628025054932


Processing epoch 00:  46%|████▌     | 2230/4850 [10:10<11:52,  3.68it/s]

Epoch: 1, Loss: 3.6617562770843506


Processing epoch 00:  46%|████▌     | 2231/4850 [10:10<11:51,  3.68it/s]

Epoch: 1, Loss: 5.053513050079346


Processing epoch 00:  46%|████▌     | 2232/4850 [10:10<11:50,  3.69it/s]

Epoch: 1, Loss: 4.301093578338623


Processing epoch 00:  46%|████▌     | 2233/4850 [10:11<11:50,  3.69it/s]

Epoch: 1, Loss: 3.5459651947021484


Processing epoch 00:  46%|████▌     | 2234/4850 [10:11<11:51,  3.68it/s]

Epoch: 1, Loss: 3.9080591201782227


Processing epoch 00:  46%|████▌     | 2235/4850 [10:11<12:00,  3.63it/s]

Epoch: 1, Loss: 3.2177207469940186


Processing epoch 00:  46%|████▌     | 2236/4850 [10:12<11:56,  3.65it/s]

Epoch: 1, Loss: 2.7473270893096924


Processing epoch 00:  46%|████▌     | 2237/4850 [10:12<12:03,  3.61it/s]

Epoch: 1, Loss: 4.7442545890808105


Processing epoch 00:  46%|████▌     | 2238/4850 [10:12<12:17,  3.54it/s]

Epoch: 1, Loss: 3.252211570739746


Processing epoch 00:  46%|████▌     | 2239/4850 [10:12<12:19,  3.53it/s]

Epoch: 1, Loss: 4.065375804901123


Processing epoch 00:  46%|████▌     | 2240/4850 [10:13<12:30,  3.48it/s]

Epoch: 1, Loss: 3.2708089351654053


Processing epoch 00:  46%|████▌     | 2241/4850 [10:13<12:40,  3.43it/s]

Epoch: 1, Loss: 3.586169719696045


Processing epoch 00:  46%|████▌     | 2242/4850 [10:13<12:35,  3.45it/s]

Epoch: 1, Loss: 5.266110897064209


Processing epoch 00:  46%|████▌     | 2243/4850 [10:14<12:42,  3.42it/s]

Epoch: 1, Loss: 4.19684362411499


Processing epoch 00:  46%|████▋     | 2244/4850 [10:14<12:43,  3.42it/s]

Epoch: 1, Loss: 5.040768146514893


Processing epoch 00:  46%|████▋     | 2245/4850 [10:14<12:47,  3.39it/s]

Epoch: 1, Loss: 3.7263338565826416


Processing epoch 00:  46%|████▋     | 2246/4850 [10:14<12:54,  3.36it/s]

Epoch: 1, Loss: 4.716684818267822


Processing epoch 00:  46%|████▋     | 2247/4850 [10:15<12:34,  3.45it/s]

Epoch: 1, Loss: 4.749382972717285


Processing epoch 00:  46%|████▋     | 2248/4850 [10:15<12:23,  3.50it/s]

Epoch: 1, Loss: 3.917133331298828


Processing epoch 00:  46%|████▋     | 2249/4850 [10:15<12:12,  3.55it/s]

Epoch: 1, Loss: 3.739643096923828


Processing epoch 00:  46%|████▋     | 2250/4850 [10:16<12:06,  3.58it/s]

Epoch: 1, Loss: 3.825639486312866


Processing epoch 00:  46%|████▋     | 2251/4850 [10:16<12:01,  3.60it/s]

Epoch: 1, Loss: 3.5186662673950195


Processing epoch 00:  46%|████▋     | 2252/4850 [10:16<11:55,  3.63it/s]

Epoch: 1, Loss: 4.346133232116699


Processing epoch 00:  46%|████▋     | 2253/4850 [10:16<11:57,  3.62it/s]

Epoch: 1, Loss: 3.7945685386657715


Processing epoch 00:  46%|████▋     | 2254/4850 [10:17<11:52,  3.64it/s]

Epoch: 1, Loss: 4.244658470153809


Processing epoch 00:  46%|████▋     | 2255/4850 [10:17<11:50,  3.65it/s]

Epoch: 1, Loss: 4.229677200317383


Processing epoch 00:  47%|████▋     | 2256/4850 [10:17<11:49,  3.66it/s]

Epoch: 1, Loss: 4.253902435302734


Processing epoch 00:  47%|████▋     | 2257/4850 [10:17<11:51,  3.65it/s]

Epoch: 1, Loss: 4.3607916831970215


Processing epoch 00:  47%|████▋     | 2258/4850 [10:18<11:50,  3.65it/s]

Epoch: 1, Loss: 4.0451979637146


Processing epoch 00:  47%|████▋     | 2259/4850 [10:18<11:49,  3.65it/s]

Epoch: 1, Loss: 3.8412656784057617


Processing epoch 00:  47%|████▋     | 2260/4850 [10:18<11:48,  3.66it/s]

Epoch: 1, Loss: 3.1613354682922363


Processing epoch 00:  47%|████▋     | 2261/4850 [10:19<11:46,  3.67it/s]

Epoch: 1, Loss: 3.681173801422119


Processing epoch 00:  47%|████▋     | 2262/4850 [10:19<11:46,  3.66it/s]

Epoch: 1, Loss: 3.488832712173462


Processing epoch 00:  47%|████▋     | 2263/4850 [10:19<11:45,  3.67it/s]

Epoch: 1, Loss: 3.721285104751587


Processing epoch 00:  47%|████▋     | 2264/4850 [10:19<11:43,  3.67it/s]

Epoch: 1, Loss: 3.284717559814453


Processing epoch 00:  47%|████▋     | 2265/4850 [10:20<11:47,  3.66it/s]

Epoch: 1, Loss: 3.7230300903320312


Processing epoch 00:  47%|████▋     | 2266/4850 [10:20<11:43,  3.67it/s]

Epoch: 1, Loss: 5.622860908508301


Processing epoch 00:  47%|████▋     | 2267/4850 [10:20<11:42,  3.68it/s]

Epoch: 1, Loss: 4.823916912078857


Processing epoch 00:  47%|████▋     | 2268/4850 [10:20<11:42,  3.68it/s]

Epoch: 1, Loss: 4.634361267089844


Processing epoch 00:  47%|████▋     | 2269/4850 [10:21<11:39,  3.69it/s]

Epoch: 1, Loss: 3.811504364013672


Processing epoch 00:  47%|████▋     | 2270/4850 [10:21<11:40,  3.68it/s]

Epoch: 1, Loss: 2.8231546878814697


Processing epoch 00:  47%|████▋     | 2271/4850 [10:21<11:41,  3.68it/s]

Epoch: 1, Loss: 3.728178024291992


Processing epoch 00:  47%|████▋     | 2272/4850 [10:22<11:48,  3.64it/s]

Epoch: 1, Loss: 3.322234869003296


Processing epoch 00:  47%|████▋     | 2273/4850 [10:22<11:46,  3.65it/s]

Epoch: 1, Loss: 4.015105247497559


Processing epoch 00:  47%|████▋     | 2274/4850 [10:22<11:43,  3.66it/s]

Epoch: 1, Loss: 4.1272873878479


Processing epoch 00:  47%|████▋     | 2275/4850 [10:22<11:41,  3.67it/s]

Epoch: 1, Loss: 3.8331494331359863


Processing epoch 00:  47%|████▋     | 2276/4850 [10:23<11:43,  3.66it/s]

Epoch: 1, Loss: 3.4870083332061768


Processing epoch 00:  47%|████▋     | 2277/4850 [10:23<11:41,  3.67it/s]

Epoch: 1, Loss: 3.8910927772521973


Processing epoch 00:  47%|████▋     | 2278/4850 [10:23<11:41,  3.67it/s]

Epoch: 1, Loss: 4.163212299346924


Processing epoch 00:  47%|████▋     | 2279/4850 [10:23<11:38,  3.68it/s]

Epoch: 1, Loss: 4.853131294250488


Processing epoch 00:  47%|████▋     | 2280/4850 [10:24<11:38,  3.68it/s]

Epoch: 1, Loss: 3.982846260070801


Processing epoch 00:  47%|████▋     | 2281/4850 [10:24<11:38,  3.68it/s]

Epoch: 1, Loss: 4.132328510284424


Processing epoch 00:  47%|████▋     | 2282/4850 [10:24<11:35,  3.69it/s]

Epoch: 1, Loss: 4.742698669433594


Processing epoch 00:  47%|████▋     | 2283/4850 [10:25<11:39,  3.67it/s]

Epoch: 1, Loss: 4.912296295166016


Processing epoch 00:  47%|████▋     | 2284/4850 [10:25<11:52,  3.60it/s]

Epoch: 1, Loss: 3.99943470954895


Processing epoch 00:  47%|████▋     | 2285/4850 [10:25<12:07,  3.53it/s]

Epoch: 1, Loss: 3.3050365447998047


Processing epoch 00:  47%|████▋     | 2286/4850 [10:25<12:12,  3.50it/s]

Epoch: 1, Loss: 3.9978318214416504


Processing epoch 00:  47%|████▋     | 2287/4850 [10:26<12:13,  3.50it/s]

Epoch: 1, Loss: 3.6832926273345947


Processing epoch 00:  47%|████▋     | 2288/4850 [10:26<12:09,  3.51it/s]

Epoch: 1, Loss: 3.5085930824279785


Processing epoch 00:  47%|████▋     | 2289/4850 [10:26<12:14,  3.49it/s]

Epoch: 1, Loss: 3.874187469482422


Processing epoch 00:  47%|████▋     | 2290/4850 [10:27<12:24,  3.44it/s]

Epoch: 1, Loss: 3.9702646732330322


Processing epoch 00:  47%|████▋     | 2291/4850 [10:27<12:31,  3.40it/s]

Epoch: 1, Loss: 3.64603590965271


Processing epoch 00:  47%|████▋     | 2292/4850 [10:27<12:35,  3.38it/s]

Epoch: 1, Loss: 4.653494358062744


Processing epoch 00:  47%|████▋     | 2293/4850 [10:28<12:46,  3.33it/s]

Epoch: 1, Loss: 3.8445494174957275


Processing epoch 00:  47%|████▋     | 2294/4850 [10:28<12:25,  3.43it/s]

Epoch: 1, Loss: 3.781048536300659


Processing epoch 00:  47%|████▋     | 2295/4850 [10:28<12:11,  3.49it/s]

Epoch: 1, Loss: 3.238786220550537


Processing epoch 00:  47%|████▋     | 2296/4850 [10:28<12:00,  3.54it/s]

Epoch: 1, Loss: 4.346901893615723


Processing epoch 00:  47%|████▋     | 2297/4850 [10:29<11:49,  3.60it/s]

Epoch: 1, Loss: 3.920398235321045


Processing epoch 00:  47%|████▋     | 2298/4850 [10:29<11:43,  3.63it/s]

Epoch: 1, Loss: 3.4864025115966797


Processing epoch 00:  47%|████▋     | 2299/4850 [10:29<11:49,  3.60it/s]

Epoch: 1, Loss: 3.7829995155334473


Processing epoch 00:  47%|████▋     | 2300/4850 [10:29<11:45,  3.62it/s]

Epoch: 1, Loss: 4.948321342468262


Processing epoch 00:  47%|████▋     | 2301/4850 [10:30<11:42,  3.63it/s]

Epoch: 1, Loss: 4.5652360916137695


Processing epoch 00:  47%|████▋     | 2302/4850 [10:30<11:42,  3.63it/s]

Epoch: 1, Loss: 4.001924991607666


Processing epoch 00:  47%|████▋     | 2303/4850 [10:30<11:39,  3.64it/s]

Epoch: 1, Loss: 3.2450947761535645


Processing epoch 00:  48%|████▊     | 2304/4850 [10:31<11:37,  3.65it/s]

Epoch: 1, Loss: 3.6573686599731445


Processing epoch 00:  48%|████▊     | 2305/4850 [10:31<11:37,  3.65it/s]

Epoch: 1, Loss: 3.2126686573028564


Processing epoch 00:  48%|████▊     | 2306/4850 [10:31<11:37,  3.65it/s]

Epoch: 1, Loss: 3.9674720764160156


Processing epoch 00:  48%|████▊     | 2307/4850 [10:31<11:35,  3.66it/s]

Epoch: 1, Loss: 3.9157450199127197


Processing epoch 00:  48%|████▊     | 2308/4850 [10:32<11:35,  3.65it/s]

Epoch: 1, Loss: 2.897099256515503


Processing epoch 00:  48%|████▊     | 2309/4850 [10:32<11:43,  3.61it/s]

Epoch: 1, Loss: 3.7150094509124756


Processing epoch 00:  48%|████▊     | 2310/4850 [10:32<11:39,  3.63it/s]

Epoch: 1, Loss: 3.3718700408935547


Processing epoch 00:  48%|████▊     | 2311/4850 [10:32<11:37,  3.64it/s]

Epoch: 1, Loss: 3.429309844970703


Processing epoch 00:  48%|████▊     | 2312/4850 [10:33<11:36,  3.65it/s]

Epoch: 1, Loss: 3.459151268005371


Processing epoch 00:  48%|████▊     | 2313/4850 [10:33<11:35,  3.65it/s]

Epoch: 1, Loss: 2.7380261421203613


Processing epoch 00:  48%|████▊     | 2314/4850 [10:33<11:33,  3.66it/s]

Epoch: 1, Loss: 3.720874786376953


Processing epoch 00:  48%|████▊     | 2315/4850 [10:34<11:33,  3.66it/s]

Epoch: 1, Loss: 3.951512336730957


Processing epoch 00:  48%|████▊     | 2316/4850 [10:34<11:32,  3.66it/s]

Epoch: 1, Loss: 3.3664145469665527


Processing epoch 00:  48%|████▊     | 2317/4850 [10:34<11:30,  3.67it/s]

Epoch: 1, Loss: 4.024991035461426


Processing epoch 00:  48%|████▊     | 2318/4850 [10:34<11:27,  3.68it/s]

Epoch: 1, Loss: 4.975865364074707


Processing epoch 00:  48%|████▊     | 2319/4850 [10:35<11:27,  3.68it/s]

Epoch: 1, Loss: 3.9146344661712646


Processing epoch 00:  48%|████▊     | 2320/4850 [10:35<11:25,  3.69it/s]

Epoch: 1, Loss: 4.635165214538574


Processing epoch 00:  48%|████▊     | 2321/4850 [10:35<11:26,  3.69it/s]

Epoch: 1, Loss: 4.157299995422363


Processing epoch 00:  48%|████▊     | 2322/4850 [10:35<11:26,  3.68it/s]

Epoch: 1, Loss: 4.60163688659668


Processing epoch 00:  48%|████▊     | 2323/4850 [10:36<11:25,  3.69it/s]

Epoch: 1, Loss: 4.291131019592285


Processing epoch 00:  48%|████▊     | 2324/4850 [10:36<11:29,  3.67it/s]

Epoch: 1, Loss: 3.839275598526001


Processing epoch 00:  48%|████▊     | 2325/4850 [10:36<11:30,  3.66it/s]

Epoch: 1, Loss: 3.3447556495666504


Processing epoch 00:  48%|████▊     | 2326/4850 [10:37<11:28,  3.67it/s]

Epoch: 1, Loss: 3.4822964668273926


Processing epoch 00:  48%|████▊     | 2327/4850 [10:37<11:28,  3.66it/s]

Epoch: 1, Loss: 3.98511004447937


Processing epoch 00:  48%|████▊     | 2328/4850 [10:37<11:27,  3.67it/s]

Epoch: 1, Loss: 3.790656089782715


Processing epoch 00:  48%|████▊     | 2329/4850 [10:37<11:27,  3.67it/s]

Epoch: 1, Loss: 3.585538864135742


Processing epoch 00:  48%|████▊     | 2330/4850 [10:38<11:43,  3.58it/s]

Epoch: 1, Loss: 4.541135787963867


Processing epoch 00:  48%|████▊     | 2331/4850 [10:38<11:48,  3.55it/s]

Epoch: 1, Loss: 3.796658992767334


Processing epoch 00:  48%|████▊     | 2332/4850 [10:38<11:53,  3.53it/s]

Epoch: 1, Loss: 3.152557373046875


Processing epoch 00:  48%|████▊     | 2333/4850 [10:39<11:56,  3.51it/s]

Epoch: 1, Loss: 4.229219436645508


Processing epoch 00:  48%|████▊     | 2334/4850 [10:39<12:02,  3.48it/s]

Epoch: 1, Loss: 4.1759443283081055


Processing epoch 00:  48%|████▊     | 2335/4850 [10:39<12:14,  3.43it/s]

Epoch: 1, Loss: 3.451482057571411


Processing epoch 00:  48%|████▊     | 2336/4850 [10:39<12:19,  3.40it/s]

Epoch: 1, Loss: 3.508190155029297


Processing epoch 00:  48%|████▊     | 2337/4850 [10:40<12:32,  3.34it/s]

Epoch: 1, Loss: 4.488307476043701


Processing epoch 00:  48%|████▊     | 2338/4850 [10:40<12:27,  3.36it/s]

Epoch: 1, Loss: 3.509758949279785


Processing epoch 00:  48%|████▊     | 2339/4850 [10:40<12:21,  3.39it/s]

Epoch: 1, Loss: 3.6517410278320312


Processing epoch 00:  48%|████▊     | 2340/4850 [10:41<12:32,  3.33it/s]

Epoch: 1, Loss: 4.169734477996826


Processing epoch 00:  48%|████▊     | 2341/4850 [10:41<12:11,  3.43it/s]

Epoch: 1, Loss: 3.861199378967285


Processing epoch 00:  48%|████▊     | 2342/4850 [10:41<11:57,  3.49it/s]

Epoch: 1, Loss: 3.364316940307617


Processing epoch 00:  48%|████▊     | 2343/4850 [10:41<11:47,  3.54it/s]

Epoch: 1, Loss: 4.240532875061035


Processing epoch 00:  48%|████▊     | 2344/4850 [10:42<11:38,  3.59it/s]

Epoch: 1, Loss: 3.713198661804199


Processing epoch 00:  48%|████▊     | 2345/4850 [10:42<11:33,  3.61it/s]

Epoch: 1, Loss: 3.6102614402770996


Processing epoch 00:  48%|████▊     | 2346/4850 [10:42<11:27,  3.64it/s]

Epoch: 1, Loss: 3.8200454711914062


Processing epoch 00:  48%|████▊     | 2347/4850 [10:43<11:24,  3.66it/s]

Epoch: 1, Loss: 4.53512716293335


Processing epoch 00:  48%|████▊     | 2348/4850 [10:43<11:28,  3.63it/s]

Epoch: 1, Loss: 3.379911422729492


Processing epoch 00:  48%|████▊     | 2349/4850 [10:43<11:26,  3.64it/s]

Epoch: 1, Loss: 2.9147279262542725


Processing epoch 00:  48%|████▊     | 2350/4850 [10:43<11:22,  3.66it/s]

Epoch: 1, Loss: 4.224626541137695


Processing epoch 00:  48%|████▊     | 2351/4850 [10:44<11:22,  3.66it/s]

Epoch: 1, Loss: 2.9985947608947754


Processing epoch 00:  48%|████▊     | 2352/4850 [10:44<11:21,  3.66it/s]

Epoch: 1, Loss: 3.717905044555664


Processing epoch 00:  49%|████▊     | 2353/4850 [10:44<11:21,  3.66it/s]

Epoch: 1, Loss: 3.3675684928894043


Processing epoch 00:  49%|████▊     | 2354/4850 [10:44<11:19,  3.67it/s]

Epoch: 1, Loss: 4.670636177062988


Processing epoch 00:  49%|████▊     | 2355/4850 [10:45<11:19,  3.67it/s]

Epoch: 1, Loss: 4.16996955871582


Processing epoch 00:  49%|████▊     | 2356/4850 [10:45<11:19,  3.67it/s]

Epoch: 1, Loss: 4.179325103759766


Processing epoch 00:  49%|████▊     | 2357/4850 [10:45<11:22,  3.65it/s]

Epoch: 1, Loss: 4.185808181762695


Processing epoch 00:  49%|████▊     | 2358/4850 [10:46<11:21,  3.66it/s]

Epoch: 1, Loss: 4.055686950683594


Processing epoch 00:  49%|████▊     | 2359/4850 [10:46<11:20,  3.66it/s]

Epoch: 1, Loss: 3.524705648422241


Processing epoch 00:  49%|████▊     | 2360/4850 [10:46<11:21,  3.65it/s]

Epoch: 1, Loss: 4.1871867179870605


Processing epoch 00:  49%|████▊     | 2361/4850 [10:46<11:22,  3.65it/s]

Epoch: 1, Loss: 3.7521345615386963


Processing epoch 00:  49%|████▊     | 2362/4850 [10:47<11:23,  3.64it/s]

Epoch: 1, Loss: 3.3431601524353027


Processing epoch 00:  49%|████▊     | 2363/4850 [10:47<11:21,  3.65it/s]

Epoch: 1, Loss: 2.75635027885437


Processing epoch 00:  49%|████▊     | 2364/4850 [10:47<11:22,  3.64it/s]

Epoch: 1, Loss: 3.5517053604125977


Processing epoch 00:  49%|████▉     | 2365/4850 [10:47<11:22,  3.64it/s]

Epoch: 1, Loss: 3.2954039573669434


Processing epoch 00:  49%|████▉     | 2366/4850 [10:48<11:18,  3.66it/s]

Epoch: 1, Loss: 4.32484769821167


Processing epoch 00:  49%|████▉     | 2367/4850 [10:48<11:18,  3.66it/s]

Epoch: 1, Loss: 3.4661412239074707


Processing epoch 00:  49%|████▉     | 2368/4850 [10:48<11:16,  3.67it/s]

Epoch: 1, Loss: 5.055970668792725


Processing epoch 00:  49%|████▉     | 2369/4850 [10:49<11:14,  3.68it/s]

Epoch: 1, Loss: 3.199275016784668


Processing epoch 00:  49%|████▉     | 2370/4850 [10:49<11:16,  3.67it/s]

Epoch: 1, Loss: 3.6129322052001953


Processing epoch 00:  49%|████▉     | 2371/4850 [10:49<11:13,  3.68it/s]

Epoch: 1, Loss: 3.6633059978485107


Processing epoch 00:  49%|████▉     | 2372/4850 [10:49<11:19,  3.65it/s]

Epoch: 1, Loss: 3.751283645629883


Processing epoch 00:  49%|████▉     | 2373/4850 [10:50<11:26,  3.61it/s]

Epoch: 1, Loss: 3.52506685256958


Processing epoch 00:  49%|████▉     | 2374/4850 [10:50<11:21,  3.63it/s]

Epoch: 1, Loss: 3.9078924655914307


Processing epoch 00:  49%|████▉     | 2375/4850 [10:50<11:20,  3.64it/s]

Epoch: 1, Loss: 3.392622947692871


Processing epoch 00:  49%|████▉     | 2376/4850 [10:50<11:19,  3.64it/s]

Epoch: 1, Loss: 4.094487190246582


Processing epoch 00:  49%|████▉     | 2377/4850 [10:51<11:28,  3.59it/s]

Epoch: 1, Loss: 3.8578152656555176


Processing epoch 00:  49%|████▉     | 2378/4850 [10:51<11:37,  3.55it/s]

Epoch: 1, Loss: 4.430243015289307


Processing epoch 00:  49%|████▉     | 2379/4850 [10:51<11:46,  3.50it/s]

Epoch: 1, Loss: 3.2445545196533203


Processing epoch 00:  49%|████▉     | 2380/4850 [10:52<11:54,  3.46it/s]

Epoch: 1, Loss: 3.3389906883239746


Processing epoch 00:  49%|████▉     | 2381/4850 [10:52<11:46,  3.49it/s]

Epoch: 1, Loss: 3.716552734375


Processing epoch 00:  49%|████▉     | 2382/4850 [10:52<11:44,  3.50it/s]

Epoch: 1, Loss: 3.2780795097351074


Processing epoch 00:  49%|████▉     | 2383/4850 [10:52<11:55,  3.45it/s]

Epoch: 1, Loss: 3.7962536811828613


Processing epoch 00:  49%|████▉     | 2384/4850 [10:53<11:59,  3.43it/s]

Epoch: 1, Loss: 3.260615110397339


Processing epoch 00:  49%|████▉     | 2385/4850 [10:53<12:18,  3.34it/s]

Epoch: 1, Loss: 4.04256534576416


Processing epoch 00:  49%|████▉     | 2386/4850 [10:53<12:06,  3.39it/s]

Epoch: 1, Loss: 3.740042209625244


Processing epoch 00:  49%|████▉     | 2387/4850 [10:54<12:06,  3.39it/s]

Epoch: 1, Loss: 3.3626885414123535


Processing epoch 00:  49%|████▉     | 2388/4850 [10:54<11:49,  3.47it/s]

Epoch: 1, Loss: 3.347784996032715


Processing epoch 00:  49%|████▉     | 2389/4850 [10:54<11:37,  3.53it/s]

Epoch: 1, Loss: 3.8822107315063477


Processing epoch 00:  49%|████▉     | 2390/4850 [10:55<11:30,  3.56it/s]

Epoch: 1, Loss: 3.475491523742676


Processing epoch 00:  49%|████▉     | 2391/4850 [10:55<11:21,  3.61it/s]

Epoch: 1, Loss: 3.1052322387695312


Processing epoch 00:  49%|████▉     | 2392/4850 [10:55<11:18,  3.62it/s]

Epoch: 1, Loss: 4.073463439941406


Processing epoch 00:  49%|████▉     | 2393/4850 [10:55<11:13,  3.65it/s]

Epoch: 1, Loss: 3.415525436401367


Processing epoch 00:  49%|████▉     | 2394/4850 [10:56<11:23,  3.59it/s]

Epoch: 1, Loss: 3.153132915496826


Processing epoch 00:  49%|████▉     | 2395/4850 [10:56<11:18,  3.62it/s]

Epoch: 1, Loss: 3.894929885864258


Processing epoch 00:  49%|████▉     | 2396/4850 [10:56<11:14,  3.64it/s]

Epoch: 1, Loss: 4.140819549560547


Processing epoch 00:  49%|████▉     | 2397/4850 [10:56<11:13,  3.64it/s]

Epoch: 1, Loss: 3.464317560195923


Processing epoch 00:  49%|████▉     | 2398/4850 [10:57<11:10,  3.66it/s]

Epoch: 1, Loss: 3.774905204772949


Processing epoch 00:  49%|████▉     | 2399/4850 [10:57<11:09,  3.66it/s]

Epoch: 1, Loss: 3.7908899784088135


Processing epoch 00:  49%|████▉     | 2400/4850 [10:57<11:08,  3.67it/s]

Epoch: 1, Loss: 3.2807672023773193


Processing epoch 00:  50%|████▉     | 2401/4850 [10:58<11:07,  3.67it/s]

Epoch: 1, Loss: 3.752657175064087


Processing epoch 00:  50%|████▉     | 2402/4850 [10:58<11:07,  3.67it/s]

Epoch: 1, Loss: 3.947455644607544


Processing epoch 00:  50%|████▉     | 2403/4850 [10:58<11:10,  3.65it/s]

Epoch: 1, Loss: 4.054462909698486


Processing epoch 00:  50%|████▉     | 2404/4850 [10:58<11:09,  3.65it/s]

Epoch: 1, Loss: 3.23923921585083


Processing epoch 00:  50%|████▉     | 2405/4850 [10:59<11:09,  3.65it/s]

Epoch: 1, Loss: 4.1238627433776855


Processing epoch 00:  50%|████▉     | 2406/4850 [10:59<11:10,  3.64it/s]

Epoch: 1, Loss: 4.148773193359375


Processing epoch 00:  50%|████▉     | 2407/4850 [10:59<11:11,  3.64it/s]

Epoch: 1, Loss: 4.083865165710449


Processing epoch 00:  50%|████▉     | 2408/4850 [10:59<11:12,  3.63it/s]

Epoch: 1, Loss: 3.619218349456787


Processing epoch 00:  50%|████▉     | 2409/4850 [11:00<11:09,  3.65it/s]

Epoch: 1, Loss: 3.1900830268859863


Processing epoch 00:  50%|████▉     | 2410/4850 [11:00<11:10,  3.64it/s]

Epoch: 1, Loss: 3.179446220397949


Processing epoch 00:  50%|████▉     | 2411/4850 [11:00<11:07,  3.65it/s]

Epoch: 1, Loss: 4.163323879241943


Processing epoch 00:  50%|████▉     | 2412/4850 [11:01<11:02,  3.68it/s]

Epoch: 1, Loss: 4.6326212882995605


Processing epoch 00:  50%|████▉     | 2413/4850 [11:01<11:04,  3.67it/s]

Epoch: 1, Loss: 3.6388070583343506


Processing epoch 00:  50%|████▉     | 2414/4850 [11:01<11:03,  3.67it/s]

Epoch: 1, Loss: 3.14371919631958


Processing epoch 00:  50%|████▉     | 2415/4850 [11:01<11:02,  3.67it/s]

Epoch: 1, Loss: 3.8358802795410156


Processing epoch 00:  50%|████▉     | 2416/4850 [11:02<11:01,  3.68it/s]

Epoch: 1, Loss: 3.7451882362365723


Processing epoch 00:  50%|████▉     | 2417/4850 [11:02<11:01,  3.68it/s]

Epoch: 1, Loss: 3.272905111312866


Processing epoch 00:  50%|████▉     | 2418/4850 [11:02<11:02,  3.67it/s]

Epoch: 1, Loss: 3.396287679672241


Processing epoch 00:  50%|████▉     | 2419/4850 [11:02<11:05,  3.65it/s]

Epoch: 1, Loss: 4.281566619873047


Processing epoch 00:  50%|████▉     | 2420/4850 [11:03<11:04,  3.66it/s]

Epoch: 1, Loss: 3.269402265548706


Processing epoch 00:  50%|████▉     | 2421/4850 [11:03<11:03,  3.66it/s]

Epoch: 1, Loss: 3.469662666320801


Processing epoch 00:  50%|████▉     | 2422/4850 [11:03<11:00,  3.68it/s]

Epoch: 1, Loss: 3.4096601009368896


Processing epoch 00:  50%|████▉     | 2423/4850 [11:04<10:58,  3.68it/s]

Epoch: 1, Loss: 3.3521103858947754


Processing epoch 00:  50%|████▉     | 2424/4850 [11:04<11:14,  3.60it/s]

Epoch: 1, Loss: 3.864800453186035


Processing epoch 00:  50%|█████     | 2425/4850 [11:04<11:22,  3.55it/s]

Epoch: 1, Loss: 4.007658958435059


Processing epoch 00:  50%|█████     | 2426/4850 [11:04<11:32,  3.50it/s]

Epoch: 1, Loss: 3.282417058944702


Processing epoch 00:  50%|█████     | 2427/4850 [11:05<11:40,  3.46it/s]

Epoch: 1, Loss: 3.8993921279907227


Processing epoch 00:  50%|█████     | 2428/4850 [11:05<11:33,  3.49it/s]

Epoch: 1, Loss: 3.2567906379699707


Processing epoch 00:  50%|█████     | 2429/4850 [11:05<11:29,  3.51it/s]

Epoch: 1, Loss: 3.852717876434326


Processing epoch 00:  50%|█████     | 2430/4850 [11:06<11:43,  3.44it/s]

Epoch: 1, Loss: 2.9747939109802246


Processing epoch 00:  50%|█████     | 2431/4850 [11:06<11:45,  3.43it/s]

Epoch: 1, Loss: 4.543245792388916


Processing epoch 00:  50%|█████     | 2432/4850 [11:06<11:46,  3.42it/s]

Epoch: 1, Loss: 3.9295425415039062


Processing epoch 00:  50%|█████     | 2433/4850 [11:06<11:56,  3.37it/s]

Epoch: 1, Loss: 3.7696175575256348


Processing epoch 00:  50%|█████     | 2434/4850 [11:07<11:53,  3.39it/s]

Epoch: 1, Loss: 4.193458080291748


Processing epoch 00:  50%|█████     | 2435/4850 [11:07<11:37,  3.46it/s]

Epoch: 1, Loss: 3.720089912414551


Processing epoch 00:  50%|█████     | 2436/4850 [11:07<11:26,  3.52it/s]

Epoch: 1, Loss: 3.2797412872314453


Processing epoch 00:  50%|█████     | 2437/4850 [11:08<11:16,  3.57it/s]

Epoch: 1, Loss: 2.622662305831909


Processing epoch 00:  50%|█████     | 2438/4850 [11:08<11:09,  3.60it/s]

Epoch: 1, Loss: 4.199838638305664


Processing epoch 00:  50%|█████     | 2439/4850 [11:08<11:04,  3.63it/s]

Epoch: 1, Loss: 3.4322915077209473


Processing epoch 00:  50%|█████     | 2440/4850 [11:08<11:05,  3.62it/s]

Epoch: 1, Loss: 4.257790565490723


Processing epoch 00:  50%|█████     | 2441/4850 [11:09<11:02,  3.64it/s]

Epoch: 1, Loss: 4.179677486419678


Processing epoch 00:  50%|█████     | 2442/4850 [11:09<10:59,  3.65it/s]

Epoch: 1, Loss: 4.095778942108154


Processing epoch 00:  50%|█████     | 2443/4850 [11:09<10:55,  3.67it/s]

Epoch: 1, Loss: 4.7245025634765625


Processing epoch 00:  50%|█████     | 2444/4850 [11:09<10:55,  3.67it/s]

Epoch: 1, Loss: 3.5926413536071777


Processing epoch 00:  50%|█████     | 2445/4850 [11:10<10:54,  3.68it/s]

Epoch: 1, Loss: 4.409735202789307


Processing epoch 00:  50%|█████     | 2446/4850 [11:10<11:04,  3.62it/s]

Epoch: 1, Loss: 4.297628879547119


Processing epoch 00:  50%|█████     | 2447/4850 [11:10<11:02,  3.62it/s]

Epoch: 1, Loss: 3.482328414916992


Processing epoch 00:  50%|█████     | 2448/4850 [11:11<11:01,  3.63it/s]

Epoch: 1, Loss: 3.6617531776428223


Processing epoch 00:  50%|█████     | 2449/4850 [11:11<10:59,  3.64it/s]

Epoch: 1, Loss: 3.457906723022461


Processing epoch 00:  51%|█████     | 2450/4850 [11:11<10:57,  3.65it/s]

Epoch: 1, Loss: 4.010927200317383


Processing epoch 00:  51%|█████     | 2451/4850 [11:11<10:56,  3.66it/s]

Epoch: 1, Loss: 3.876826047897339


Processing epoch 00:  51%|█████     | 2452/4850 [11:12<10:56,  3.65it/s]

Epoch: 1, Loss: 2.9827589988708496


Processing epoch 00:  51%|█████     | 2453/4850 [11:12<10:54,  3.66it/s]

Epoch: 1, Loss: 3.249251365661621


Processing epoch 00:  51%|█████     | 2454/4850 [11:12<10:54,  3.66it/s]

Epoch: 1, Loss: 3.8654403686523438


Processing epoch 00:  51%|█████     | 2455/4850 [11:12<10:52,  3.67it/s]

Epoch: 1, Loss: 3.9131641387939453


Processing epoch 00:  51%|█████     | 2456/4850 [11:13<10:50,  3.68it/s]

Epoch: 1, Loss: 4.27707576751709


Processing epoch 00:  51%|█████     | 2457/4850 [11:13<10:57,  3.64it/s]

Epoch: 1, Loss: 3.7759714126586914


Processing epoch 00:  51%|█████     | 2458/4850 [11:13<10:55,  3.65it/s]

Epoch: 1, Loss: 2.877152442932129


Processing epoch 00:  51%|█████     | 2459/4850 [11:14<10:51,  3.67it/s]

Epoch: 1, Loss: 4.154925346374512


Processing epoch 00:  51%|█████     | 2460/4850 [11:14<10:52,  3.66it/s]

Epoch: 1, Loss: 3.387444257736206


Processing epoch 00:  51%|█████     | 2461/4850 [11:14<10:53,  3.66it/s]

Epoch: 1, Loss: 3.1171112060546875


Processing epoch 00:  51%|█████     | 2462/4850 [11:14<10:53,  3.65it/s]

Epoch: 1, Loss: 3.861445665359497


Processing epoch 00:  51%|█████     | 2463/4850 [11:15<10:52,  3.66it/s]

Epoch: 1, Loss: 3.9726128578186035


Processing epoch 00:  51%|█████     | 2464/4850 [11:15<10:50,  3.67it/s]

Epoch: 1, Loss: 4.443832874298096


Processing epoch 00:  51%|█████     | 2465/4850 [11:15<10:47,  3.68it/s]

Epoch: 1, Loss: 4.419483661651611


Processing epoch 00:  51%|█████     | 2466/4850 [11:15<10:50,  3.66it/s]

Epoch: 1, Loss: 3.88600492477417


Processing epoch 00:  51%|█████     | 2467/4850 [11:16<10:50,  3.66it/s]

Epoch: 1, Loss: 3.6865363121032715


Processing epoch 00:  51%|█████     | 2468/4850 [11:16<10:51,  3.66it/s]

Epoch: 1, Loss: 3.4454715251922607


Processing epoch 00:  51%|█████     | 2469/4850 [11:16<10:50,  3.66it/s]

Epoch: 1, Loss: 3.554762601852417


Processing epoch 00:  51%|█████     | 2470/4850 [11:17<10:49,  3.66it/s]

Epoch: 1, Loss: 4.215522766113281


Processing epoch 00:  51%|█████     | 2471/4850 [11:17<10:59,  3.61it/s]

Epoch: 1, Loss: 3.2340550422668457


Processing epoch 00:  51%|█████     | 2472/4850 [11:17<11:23,  3.48it/s]

Epoch: 1, Loss: 3.195276975631714


Processing epoch 00:  51%|█████     | 2473/4850 [11:17<11:29,  3.45it/s]

Epoch: 1, Loss: 3.977992057800293


Processing epoch 00:  51%|█████     | 2474/4850 [11:18<11:23,  3.48it/s]

Epoch: 1, Loss: 3.6218864917755127


Processing epoch 00:  51%|█████     | 2475/4850 [11:18<11:26,  3.46it/s]

Epoch: 1, Loss: 3.9262688159942627


Processing epoch 00:  51%|█████     | 2476/4850 [11:18<11:29,  3.45it/s]

Epoch: 1, Loss: 3.0005135536193848


Processing epoch 00:  51%|█████     | 2477/4850 [11:19<11:29,  3.44it/s]

Epoch: 1, Loss: 3.1790738105773926


Processing epoch 00:  51%|█████     | 2478/4850 [11:19<11:27,  3.45it/s]

Epoch: 1, Loss: 3.4622013568878174


Processing epoch 00:  51%|█████     | 2479/4850 [11:19<11:28,  3.44it/s]

Epoch: 1, Loss: 3.7976438999176025


Processing epoch 00:  51%|█████     | 2480/4850 [11:20<11:30,  3.43it/s]

Epoch: 1, Loss: 2.8822436332702637


Processing epoch 00:  51%|█████     | 2481/4850 [11:20<11:43,  3.37it/s]

Epoch: 1, Loss: 3.1556396484375


Processing epoch 00:  51%|█████     | 2482/4850 [11:20<11:26,  3.45it/s]

Epoch: 1, Loss: 3.889378786087036


Processing epoch 00:  51%|█████     | 2483/4850 [11:20<11:22,  3.47it/s]

Epoch: 1, Loss: 3.4770302772521973


Processing epoch 00:  51%|█████     | 2484/4850 [11:21<11:11,  3.52it/s]

Epoch: 1, Loss: 3.9326634407043457


Processing epoch 00:  51%|█████     | 2485/4850 [11:21<11:02,  3.57it/s]

Epoch: 1, Loss: 3.7149293422698975


Processing epoch 00:  51%|█████▏    | 2486/4850 [11:21<10:57,  3.60it/s]

Epoch: 1, Loss: 4.161375999450684


Processing epoch 00:  51%|█████▏    | 2487/4850 [11:21<10:55,  3.60it/s]

Epoch: 1, Loss: 3.417668342590332


Processing epoch 00:  51%|█████▏    | 2488/4850 [11:22<10:52,  3.62it/s]

Epoch: 1, Loss: 3.4342596530914307


Processing epoch 00:  51%|█████▏    | 2489/4850 [11:22<10:49,  3.64it/s]

Epoch: 1, Loss: 3.256608009338379


Processing epoch 00:  51%|█████▏    | 2490/4850 [11:22<10:49,  3.63it/s]

Epoch: 1, Loss: 3.089590311050415


Processing epoch 00:  51%|█████▏    | 2491/4850 [11:23<10:45,  3.65it/s]

Epoch: 1, Loss: 4.143383979797363


Processing epoch 00:  51%|█████▏    | 2492/4850 [11:23<10:45,  3.65it/s]

Epoch: 1, Loss: 4.160442352294922


Processing epoch 00:  51%|█████▏    | 2493/4850 [11:23<10:44,  3.66it/s]

Epoch: 1, Loss: 3.4832520484924316


Processing epoch 00:  51%|█████▏    | 2494/4850 [11:23<10:45,  3.65it/s]

Epoch: 1, Loss: 3.492363929748535


Processing epoch 00:  51%|█████▏    | 2495/4850 [11:24<10:41,  3.67it/s]

Epoch: 1, Loss: 5.397270202636719


Processing epoch 00:  51%|█████▏    | 2496/4850 [11:24<10:41,  3.67it/s]

Epoch: 1, Loss: 3.640104293823242


Processing epoch 00:  51%|█████▏    | 2497/4850 [11:24<10:37,  3.69it/s]

Epoch: 1, Loss: 3.9598679542541504


Processing epoch 00:  52%|█████▏    | 2498/4850 [11:24<10:42,  3.66it/s]

Epoch: 1, Loss: 3.993469715118408


Processing epoch 00:  52%|█████▏    | 2499/4850 [11:25<10:40,  3.67it/s]

Epoch: 1, Loss: 3.8087711334228516


Processing epoch 00:  52%|█████▏    | 2500/4850 [11:25<10:37,  3.68it/s]

Epoch: 1, Loss: 4.263988494873047


Processing epoch 00:  52%|█████▏    | 2501/4850 [11:25<10:39,  3.67it/s]

Epoch: 1, Loss: 3.7610509395599365


Processing epoch 00:  52%|█████▏    | 2502/4850 [11:26<10:40,  3.67it/s]

Epoch: 1, Loss: 4.295510292053223


Processing epoch 00:  52%|█████▏    | 2503/4850 [11:26<10:42,  3.65it/s]

Epoch: 1, Loss: 4.099940299987793


Processing epoch 00:  52%|█████▏    | 2504/4850 [11:26<10:39,  3.67it/s]

Epoch: 1, Loss: 3.410247802734375


Processing epoch 00:  52%|█████▏    | 2505/4850 [11:26<10:38,  3.67it/s]

Epoch: 1, Loss: 3.736936092376709


Processing epoch 00:  52%|█████▏    | 2506/4850 [11:27<10:35,  3.69it/s]

Epoch: 1, Loss: 5.028566360473633


Processing epoch 00:  52%|█████▏    | 2507/4850 [11:27<10:39,  3.66it/s]

Epoch: 1, Loss: 3.1175765991210938


Processing epoch 00:  52%|█████▏    | 2508/4850 [11:27<10:40,  3.66it/s]

Epoch: 1, Loss: 3.3797271251678467


Processing epoch 00:  52%|█████▏    | 2509/4850 [11:27<10:39,  3.66it/s]

Epoch: 1, Loss: 3.890713691711426


Processing epoch 00:  52%|█████▏    | 2510/4850 [11:28<10:39,  3.66it/s]

Epoch: 1, Loss: 4.1653971672058105


Processing epoch 00:  52%|█████▏    | 2511/4850 [11:28<10:41,  3.64it/s]

Epoch: 1, Loss: 3.8823294639587402


Processing epoch 00:  52%|█████▏    | 2512/4850 [11:28<10:42,  3.64it/s]

Epoch: 1, Loss: 3.1557769775390625


Processing epoch 00:  52%|█████▏    | 2513/4850 [11:29<10:47,  3.61it/s]

Epoch: 1, Loss: 3.596518039703369


Processing epoch 00:  52%|█████▏    | 2514/4850 [11:29<10:42,  3.63it/s]

Epoch: 1, Loss: 4.1569061279296875


Processing epoch 00:  52%|█████▏    | 2515/4850 [11:29<10:39,  3.65it/s]

Epoch: 1, Loss: 4.119253635406494


Processing epoch 00:  52%|█████▏    | 2516/4850 [11:29<10:40,  3.64it/s]

Epoch: 1, Loss: 3.9138197898864746


Processing epoch 00:  52%|█████▏    | 2517/4850 [11:30<10:43,  3.63it/s]

Epoch: 1, Loss: 3.155083417892456


Processing epoch 00:  52%|█████▏    | 2518/4850 [11:30<10:54,  3.56it/s]

Epoch: 1, Loss: 3.6852123737335205


Processing epoch 00:  52%|█████▏    | 2519/4850 [11:30<10:57,  3.54it/s]

Epoch: 1, Loss: 3.362698554992676


Processing epoch 00:  52%|█████▏    | 2520/4850 [11:31<11:09,  3.48it/s]

Epoch: 1, Loss: 3.3671257495880127


Processing epoch 00:  52%|█████▏    | 2521/4850 [11:31<11:05,  3.50it/s]

Epoch: 1, Loss: 4.07094144821167


Processing epoch 00:  52%|█████▏    | 2522/4850 [11:31<10:57,  3.54it/s]

Epoch: 1, Loss: 4.610234260559082


Processing epoch 00:  52%|█████▏    | 2523/4850 [11:31<11:08,  3.48it/s]

Epoch: 1, Loss: 3.539780855178833


Processing epoch 00:  52%|█████▏    | 2524/4850 [11:32<11:19,  3.42it/s]

Epoch: 1, Loss: 3.839325189590454


Processing epoch 00:  52%|█████▏    | 2525/4850 [11:32<11:22,  3.41it/s]

Epoch: 1, Loss: 3.8136868476867676


Processing epoch 00:  52%|█████▏    | 2526/4850 [11:32<11:35,  3.34it/s]

Epoch: 1, Loss: 3.477565288543701


Processing epoch 00:  52%|█████▏    | 2527/4850 [11:33<11:23,  3.40it/s]

Epoch: 1, Loss: 3.950359344482422


Processing epoch 00:  52%|█████▏    | 2528/4850 [11:33<11:16,  3.43it/s]

Epoch: 1, Loss: 4.576683044433594


Processing epoch 00:  52%|█████▏    | 2529/4850 [11:33<11:04,  3.49it/s]

Epoch: 1, Loss: 2.991616725921631


Processing epoch 00:  52%|█████▏    | 2530/4850 [11:33<10:53,  3.55it/s]

Epoch: 1, Loss: 3.454683780670166


Processing epoch 00:  52%|█████▏    | 2531/4850 [11:34<10:52,  3.55it/s]

Epoch: 1, Loss: 3.385493755340576


Processing epoch 00:  52%|█████▏    | 2532/4850 [11:34<10:43,  3.60it/s]

Epoch: 1, Loss: 4.475428104400635


Processing epoch 00:  52%|█████▏    | 2533/4850 [11:34<10:38,  3.63it/s]

Epoch: 1, Loss: 3.63449764251709


Processing epoch 00:  52%|█████▏    | 2534/4850 [11:35<10:37,  3.64it/s]

Epoch: 1, Loss: 3.6995716094970703


Processing epoch 00:  52%|█████▏    | 2535/4850 [11:35<10:37,  3.63it/s]

Epoch: 1, Loss: 4.009909629821777


Processing epoch 00:  52%|█████▏    | 2536/4850 [11:35<10:35,  3.64it/s]

Epoch: 1, Loss: 4.117167949676514


Processing epoch 00:  52%|█████▏    | 2537/4850 [11:35<10:36,  3.63it/s]

Epoch: 1, Loss: 3.6224546432495117


Processing epoch 00:  52%|█████▏    | 2538/4850 [11:36<10:33,  3.65it/s]

Epoch: 1, Loss: 4.190451145172119


Processing epoch 00:  52%|█████▏    | 2539/4850 [11:36<10:30,  3.67it/s]

Epoch: 1, Loss: 3.322445869445801


Processing epoch 00:  52%|█████▏    | 2540/4850 [11:36<10:29,  3.67it/s]

Epoch: 1, Loss: 4.080512523651123


Processing epoch 00:  52%|█████▏    | 2541/4850 [11:36<10:28,  3.68it/s]

Epoch: 1, Loss: 3.7526328563690186


Processing epoch 00:  52%|█████▏    | 2542/4850 [11:37<10:26,  3.68it/s]

Epoch: 1, Loss: 4.656868934631348


Processing epoch 00:  52%|█████▏    | 2543/4850 [11:37<10:26,  3.68it/s]

Epoch: 1, Loss: 3.1125473976135254


Processing epoch 00:  52%|█████▏    | 2544/4850 [11:37<10:23,  3.70it/s]

Epoch: 1, Loss: 3.3390204906463623


Processing epoch 00:  52%|█████▏    | 2545/4850 [11:38<10:23,  3.70it/s]

Epoch: 1, Loss: 3.596752643585205


Processing epoch 00:  52%|█████▏    | 2546/4850 [11:38<10:26,  3.68it/s]

Epoch: 1, Loss: 2.975884437561035


Processing epoch 00:  53%|█████▎    | 2547/4850 [11:38<10:25,  3.68it/s]

Epoch: 1, Loss: 3.5293993949890137


Processing epoch 00:  53%|█████▎    | 2548/4850 [11:38<10:25,  3.68it/s]

Epoch: 1, Loss: 3.7450456619262695


Processing epoch 00:  53%|█████▎    | 2549/4850 [11:39<10:26,  3.67it/s]

Epoch: 1, Loss: 3.9971096515655518


Processing epoch 00:  53%|█████▎    | 2550/4850 [11:39<10:32,  3.64it/s]

Epoch: 1, Loss: 3.3097686767578125


Processing epoch 00:  53%|█████▎    | 2551/4850 [11:39<10:27,  3.66it/s]

Epoch: 1, Loss: 4.195241928100586


Processing epoch 00:  53%|█████▎    | 2552/4850 [11:39<10:28,  3.66it/s]

Epoch: 1, Loss: 4.2696123123168945


Processing epoch 00:  53%|█████▎    | 2553/4850 [11:40<10:25,  3.67it/s]

Epoch: 1, Loss: 3.5717220306396484


Processing epoch 00:  53%|█████▎    | 2554/4850 [11:40<10:27,  3.66it/s]

Epoch: 1, Loss: 3.022442579269409


Processing epoch 00:  53%|█████▎    | 2555/4850 [11:40<10:26,  3.66it/s]

Epoch: 1, Loss: 3.253610610961914


Processing epoch 00:  53%|█████▎    | 2556/4850 [11:41<10:26,  3.66it/s]

Epoch: 1, Loss: 3.7552523612976074


Processing epoch 00:  53%|█████▎    | 2557/4850 [11:41<10:25,  3.66it/s]

Epoch: 1, Loss: 3.3524155616760254


Processing epoch 00:  53%|█████▎    | 2558/4850 [11:41<10:27,  3.65it/s]

Epoch: 1, Loss: 4.42113733291626


Processing epoch 00:  53%|█████▎    | 2559/4850 [11:41<10:34,  3.61it/s]

Epoch: 1, Loss: 3.386829137802124


Processing epoch 00:  53%|█████▎    | 2560/4850 [11:42<10:31,  3.63it/s]

Epoch: 1, Loss: 4.133274078369141


Processing epoch 00:  53%|█████▎    | 2561/4850 [11:42<10:29,  3.64it/s]

Epoch: 1, Loss: 3.1953697204589844


Processing epoch 00:  53%|█████▎    | 2562/4850 [11:42<10:26,  3.65it/s]

Epoch: 1, Loss: 4.317996025085449


Processing epoch 00:  53%|█████▎    | 2563/4850 [11:42<10:25,  3.66it/s]

Epoch: 1, Loss: 4.105887413024902


Processing epoch 00:  53%|█████▎    | 2564/4850 [11:43<10:25,  3.65it/s]

Epoch: 1, Loss: 3.258648633956909


Processing epoch 00:  53%|█████▎    | 2565/4850 [11:43<10:35,  3.59it/s]

Epoch: 1, Loss: 4.150055885314941


Processing epoch 00:  53%|█████▎    | 2566/4850 [11:43<10:42,  3.56it/s]

Epoch: 1, Loss: 4.275711536407471


Processing epoch 00:  53%|█████▎    | 2567/4850 [11:44<10:49,  3.51it/s]

Epoch: 1, Loss: 3.7266931533813477


Processing epoch 00:  53%|█████▎    | 2568/4850 [11:44<10:50,  3.51it/s]

Epoch: 1, Loss: 2.9605236053466797


Processing epoch 00:  53%|█████▎    | 2569/4850 [11:44<10:51,  3.50it/s]

Epoch: 1, Loss: 4.844143867492676


Processing epoch 00:  53%|█████▎    | 2570/4850 [11:44<10:52,  3.50it/s]

Epoch: 1, Loss: 3.820403575897217


Processing epoch 00:  53%|█████▎    | 2571/4850 [11:45<10:59,  3.46it/s]

Epoch: 1, Loss: 3.5240845680236816


Processing epoch 00:  53%|█████▎    | 2572/4850 [11:45<10:55,  3.47it/s]

Epoch: 1, Loss: 3.6647822856903076


Processing epoch 00:  53%|█████▎    | 2573/4850 [11:45<10:56,  3.47it/s]

Epoch: 1, Loss: 3.3705008029937744


Processing epoch 00:  53%|█████▎    | 2574/4850 [11:46<10:50,  3.50it/s]

Epoch: 1, Loss: 4.806589603424072


Processing epoch 00:  53%|█████▎    | 2575/4850 [11:46<10:54,  3.48it/s]

Epoch: 1, Loss: 4.728036403656006


Processing epoch 00:  53%|█████▎    | 2576/4850 [11:46<11:02,  3.43it/s]

Epoch: 1, Loss: 3.213395595550537


Processing epoch 00:  53%|█████▎    | 2577/4850 [11:46<10:49,  3.50it/s]

Epoch: 1, Loss: 3.664579391479492


Processing epoch 00:  53%|█████▎    | 2578/4850 [11:47<10:41,  3.54it/s]

Epoch: 1, Loss: 3.6370432376861572


Processing epoch 00:  53%|█████▎    | 2579/4850 [11:47<10:36,  3.57it/s]

Epoch: 1, Loss: 3.182401657104492


Processing epoch 00:  53%|█████▎    | 2580/4850 [11:47<10:30,  3.60it/s]

Epoch: 1, Loss: 4.119782447814941


Processing epoch 00:  53%|█████▎    | 2581/4850 [11:48<10:27,  3.62it/s]

Epoch: 1, Loss: 3.16265606880188


Processing epoch 00:  53%|█████▎    | 2582/4850 [11:48<10:25,  3.63it/s]

Epoch: 1, Loss: 3.981522560119629


Processing epoch 00:  53%|█████▎    | 2583/4850 [11:48<10:23,  3.64it/s]

Epoch: 1, Loss: 3.5137624740600586


Processing epoch 00:  53%|█████▎    | 2584/4850 [11:48<10:19,  3.66it/s]

Epoch: 1, Loss: 3.539574146270752


Processing epoch 00:  53%|█████▎    | 2585/4850 [11:49<10:20,  3.65it/s]

Epoch: 1, Loss: 3.1702780723571777


Processing epoch 00:  53%|█████▎    | 2586/4850 [11:49<10:19,  3.66it/s]

Epoch: 1, Loss: 2.877505302429199


Processing epoch 00:  53%|█████▎    | 2587/4850 [11:49<10:24,  3.63it/s]

Epoch: 1, Loss: 3.4560458660125732


Processing epoch 00:  53%|█████▎    | 2588/4850 [11:49<10:22,  3.64it/s]

Epoch: 1, Loss: 3.6283040046691895


Processing epoch 00:  53%|█████▎    | 2589/4850 [11:50<10:19,  3.65it/s]

Epoch: 1, Loss: 4.200360298156738


Processing epoch 00:  53%|█████▎    | 2590/4850 [11:50<10:19,  3.65it/s]

Epoch: 1, Loss: 3.4495315551757812


Processing epoch 00:  53%|█████▎    | 2591/4850 [11:50<10:19,  3.65it/s]

Epoch: 1, Loss: 4.35247802734375


Processing epoch 00:  53%|█████▎    | 2592/4850 [11:51<10:18,  3.65it/s]

Epoch: 1, Loss: 3.6595916748046875


Processing epoch 00:  53%|█████▎    | 2593/4850 [11:51<10:17,  3.65it/s]

Epoch: 1, Loss: 3.6517515182495117


Processing epoch 00:  53%|█████▎    | 2594/4850 [11:51<10:17,  3.65it/s]

Epoch: 1, Loss: 3.634847640991211


Processing epoch 00:  54%|█████▎    | 2595/4850 [11:51<10:15,  3.66it/s]

Epoch: 1, Loss: 3.217900037765503


Processing epoch 00:  54%|█████▎    | 2596/4850 [11:52<10:16,  3.66it/s]

Epoch: 1, Loss: 3.0363759994506836


Processing epoch 00:  54%|█████▎    | 2597/4850 [11:52<10:14,  3.67it/s]

Epoch: 1, Loss: 3.7435927391052246


Processing epoch 00:  54%|█████▎    | 2598/4850 [11:52<10:14,  3.67it/s]

Epoch: 1, Loss: 4.906149864196777


Processing epoch 00:  54%|█████▎    | 2599/4850 [11:52<10:13,  3.67it/s]

Epoch: 1, Loss: 3.639054536819458


Processing epoch 00:  54%|█████▎    | 2600/4850 [11:53<10:21,  3.62it/s]

Epoch: 1, Loss: 4.157704830169678


Processing epoch 00:  54%|█████▎    | 2601/4850 [11:53<10:21,  3.62it/s]

Epoch: 1, Loss: 3.4313852787017822


Processing epoch 00:  54%|█████▎    | 2602/4850 [11:53<10:23,  3.61it/s]

Epoch: 1, Loss: 3.3822546005249023


Processing epoch 00:  54%|█████▎    | 2603/4850 [11:54<10:20,  3.62it/s]

Epoch: 1, Loss: 3.589874505996704


Processing epoch 00:  54%|█████▎    | 2604/4850 [11:54<10:19,  3.62it/s]

Epoch: 1, Loss: 2.7708733081817627


Processing epoch 00:  54%|█████▎    | 2605/4850 [11:54<10:16,  3.64it/s]

Epoch: 1, Loss: 3.6133511066436768


Processing epoch 00:  54%|█████▎    | 2606/4850 [11:54<10:15,  3.65it/s]

Epoch: 1, Loss: 4.02012300491333


Processing epoch 00:  54%|█████▍    | 2607/4850 [11:55<10:14,  3.65it/s]

Epoch: 1, Loss: 3.3236207962036133


Processing epoch 00:  54%|█████▍    | 2608/4850 [11:55<10:14,  3.65it/s]

Epoch: 1, Loss: 3.379452705383301


Processing epoch 00:  54%|█████▍    | 2609/4850 [11:55<10:12,  3.66it/s]

Epoch: 1, Loss: 3.524409294128418


Processing epoch 00:  54%|█████▍    | 2610/4850 [11:56<10:11,  3.66it/s]

Epoch: 1, Loss: 3.096691608428955


Processing epoch 00:  54%|█████▍    | 2611/4850 [11:56<10:11,  3.66it/s]

Epoch: 1, Loss: 3.5288853645324707


Processing epoch 00:  54%|█████▍    | 2612/4850 [11:56<10:09,  3.67it/s]

Epoch: 1, Loss: 3.6975526809692383


Processing epoch 00:  54%|█████▍    | 2613/4850 [11:56<10:27,  3.57it/s]

Epoch: 1, Loss: 2.9074149131774902


Processing epoch 00:  54%|█████▍    | 2614/4850 [11:57<10:31,  3.54it/s]

Epoch: 1, Loss: 3.025073528289795


Processing epoch 00:  54%|█████▍    | 2615/4850 [11:57<10:40,  3.49it/s]

Epoch: 1, Loss: 3.7975709438323975


Processing epoch 00:  54%|█████▍    | 2616/4850 [11:57<10:41,  3.48it/s]

Epoch: 1, Loss: 3.4031858444213867


Processing epoch 00:  54%|█████▍    | 2617/4850 [11:58<10:47,  3.45it/s]

Epoch: 1, Loss: 2.8650848865509033


Processing epoch 00:  54%|█████▍    | 2618/4850 [11:58<10:48,  3.44it/s]

Epoch: 1, Loss: 3.5820395946502686


Processing epoch 00:  54%|█████▍    | 2619/4850 [11:58<10:58,  3.39it/s]

Epoch: 1, Loss: 4.071303844451904


Processing epoch 00:  54%|█████▍    | 2620/4850 [11:58<10:57,  3.39it/s]

Epoch: 1, Loss: 4.120696067810059


Processing epoch 00:  54%|█████▍    | 2621/4850 [11:59<10:59,  3.38it/s]

Epoch: 1, Loss: 3.2746124267578125


Processing epoch 00:  54%|█████▍    | 2622/4850 [11:59<10:54,  3.40it/s]

Epoch: 1, Loss: 3.830592393875122


Processing epoch 00:  54%|█████▍    | 2623/4850 [11:59<10:41,  3.47it/s]

Epoch: 1, Loss: 4.053164958953857


Processing epoch 00:  54%|█████▍    | 2624/4850 [12:00<10:36,  3.50it/s]

Epoch: 1, Loss: 3.52345609664917


Processing epoch 00:  54%|█████▍    | 2625/4850 [12:00<10:27,  3.54it/s]

Epoch: 1, Loss: 3.209693670272827


Processing epoch 00:  54%|█████▍    | 2626/4850 [12:00<10:19,  3.59it/s]

Epoch: 1, Loss: 4.342864036560059


Processing epoch 00:  54%|█████▍    | 2627/4850 [12:00<10:16,  3.61it/s]

Epoch: 1, Loss: 3.5067131519317627


Processing epoch 00:  54%|█████▍    | 2628/4850 [12:01<10:11,  3.63it/s]

Epoch: 1, Loss: 4.015523910522461


Processing epoch 00:  54%|█████▍    | 2629/4850 [12:01<10:07,  3.65it/s]

Epoch: 1, Loss: 5.274197101593018


Processing epoch 00:  54%|█████▍    | 2630/4850 [12:01<10:04,  3.67it/s]

Epoch: 1, Loss: 4.298559665679932


Processing epoch 00:  54%|█████▍    | 2631/4850 [12:01<10:03,  3.68it/s]

Epoch: 1, Loss: 3.998791456222534


Processing epoch 00:  54%|█████▍    | 2632/4850 [12:02<10:03,  3.68it/s]

Epoch: 1, Loss: 3.430023670196533


Processing epoch 00:  54%|█████▍    | 2633/4850 [12:02<10:02,  3.68it/s]

Epoch: 1, Loss: 3.309497356414795


Processing epoch 00:  54%|█████▍    | 2634/4850 [12:02<10:11,  3.62it/s]

Epoch: 1, Loss: 3.231550693511963


Processing epoch 00:  54%|█████▍    | 2635/4850 [12:03<10:09,  3.64it/s]

Epoch: 1, Loss: 3.5481362342834473


Processing epoch 00:  54%|█████▍    | 2636/4850 [12:03<10:07,  3.64it/s]

Epoch: 1, Loss: 3.460909366607666


Processing epoch 00:  54%|█████▍    | 2637/4850 [12:03<10:06,  3.65it/s]

Epoch: 1, Loss: 2.867626667022705


Processing epoch 00:  54%|█████▍    | 2638/4850 [12:03<10:03,  3.66it/s]

Epoch: 1, Loss: 3.381354570388794


Processing epoch 00:  54%|█████▍    | 2639/4850 [12:04<10:10,  3.62it/s]

Epoch: 1, Loss: 3.472775459289551


Processing epoch 00:  54%|█████▍    | 2640/4850 [12:04<10:10,  3.62it/s]

Epoch: 1, Loss: 2.796182632446289


Processing epoch 00:  54%|█████▍    | 2641/4850 [12:04<10:08,  3.63it/s]

Epoch: 1, Loss: 3.922677993774414


Processing epoch 00:  54%|█████▍    | 2642/4850 [12:04<10:06,  3.64it/s]

Epoch: 1, Loss: 3.4851717948913574


Processing epoch 00:  54%|█████▍    | 2643/4850 [12:05<10:05,  3.65it/s]

Epoch: 1, Loss: 3.8963570594787598


Processing epoch 00:  55%|█████▍    | 2644/4850 [12:05<10:02,  3.66it/s]

Epoch: 1, Loss: 3.9419946670532227


Processing epoch 00:  55%|█████▍    | 2645/4850 [12:05<10:00,  3.67it/s]

Epoch: 1, Loss: 4.116549491882324


Processing epoch 00:  55%|█████▍    | 2646/4850 [12:06<10:02,  3.66it/s]

Epoch: 1, Loss: 3.811256170272827


Processing epoch 00:  55%|█████▍    | 2647/4850 [12:06<10:05,  3.64it/s]

Epoch: 1, Loss: 2.8560001850128174


Processing epoch 00:  55%|█████▍    | 2648/4850 [12:06<10:01,  3.66it/s]

Epoch: 1, Loss: 4.298994541168213


Processing epoch 00:  55%|█████▍    | 2649/4850 [12:06<10:04,  3.64it/s]

Epoch: 1, Loss: 3.214724063873291


Processing epoch 00:  55%|█████▍    | 2650/4850 [12:07<10:03,  3.64it/s]

Epoch: 1, Loss: 3.0680079460144043


Processing epoch 00:  55%|█████▍    | 2651/4850 [12:07<10:03,  3.64it/s]

Epoch: 1, Loss: 3.6147677898406982


Processing epoch 00:  55%|█████▍    | 2652/4850 [12:07<10:02,  3.65it/s]

Epoch: 1, Loss: 4.3093695640563965


Processing epoch 00:  55%|█████▍    | 2653/4850 [12:07<10:01,  3.65it/s]

Epoch: 1, Loss: 3.2254223823547363


Processing epoch 00:  55%|█████▍    | 2654/4850 [12:08<10:03,  3.64it/s]

Epoch: 1, Loss: 3.9678597450256348


Processing epoch 00:  55%|█████▍    | 2655/4850 [12:08<10:03,  3.64it/s]

Epoch: 1, Loss: 3.6326422691345215


Processing epoch 00:  55%|█████▍    | 2656/4850 [12:08<09:57,  3.67it/s]

Epoch: 1, Loss: 4.516003608703613


Processing epoch 00:  55%|█████▍    | 2657/4850 [12:09<09:55,  3.68it/s]

Epoch: 1, Loss: 3.6442975997924805


Processing epoch 00:  55%|█████▍    | 2658/4850 [12:09<09:57,  3.67it/s]

Epoch: 1, Loss: 3.7567803859710693


Processing epoch 00:  55%|█████▍    | 2659/4850 [12:09<09:57,  3.67it/s]

Epoch: 1, Loss: 2.8844523429870605


Processing epoch 00:  55%|█████▍    | 2660/4850 [12:09<10:09,  3.59it/s]

Epoch: 1, Loss: 3.0990004539489746


Processing epoch 00:  55%|█████▍    | 2661/4850 [12:10<10:15,  3.56it/s]

Epoch: 1, Loss: 3.261035919189453


Processing epoch 00:  55%|█████▍    | 2662/4850 [12:10<10:26,  3.49it/s]

Epoch: 1, Loss: 4.336649417877197


Processing epoch 00:  55%|█████▍    | 2663/4850 [12:10<10:34,  3.45it/s]

Epoch: 1, Loss: 3.1600379943847656


Processing epoch 00:  55%|█████▍    | 2664/4850 [12:11<10:36,  3.44it/s]

Epoch: 1, Loss: 3.5487473011016846


Processing epoch 00:  55%|█████▍    | 2665/4850 [12:11<10:33,  3.45it/s]

Epoch: 1, Loss: 4.681213855743408


Processing epoch 00:  55%|█████▍    | 2666/4850 [12:11<10:31,  3.46it/s]

Epoch: 1, Loss: 3.8606104850769043


Processing epoch 00:  55%|█████▍    | 2667/4850 [12:11<10:33,  3.45it/s]

Epoch: 1, Loss: 3.6543641090393066


Processing epoch 00:  55%|█████▌    | 2668/4850 [12:12<10:30,  3.46it/s]

Epoch: 1, Loss: 3.607719659805298


Processing epoch 00:  55%|█████▌    | 2669/4850 [12:12<10:37,  3.42it/s]

Epoch: 1, Loss: 3.7433300018310547


Processing epoch 00:  55%|█████▌    | 2670/4850 [12:12<10:29,  3.46it/s]

Epoch: 1, Loss: 3.7191898822784424


Processing epoch 00:  55%|█████▌    | 2671/4850 [12:13<10:18,  3.52it/s]

Epoch: 1, Loss: 2.895132064819336


Processing epoch 00:  55%|█████▌    | 2672/4850 [12:13<10:11,  3.56it/s]

Epoch: 1, Loss: 3.532073736190796


Processing epoch 00:  55%|█████▌    | 2673/4850 [12:13<10:06,  3.59it/s]

Epoch: 1, Loss: 2.8365628719329834


Processing epoch 00:  55%|█████▌    | 2674/4850 [12:13<10:04,  3.60it/s]

Epoch: 1, Loss: 3.8637843132019043


Processing epoch 00:  55%|█████▌    | 2675/4850 [12:14<09:59,  3.63it/s]

Epoch: 1, Loss: 3.2672884464263916


Processing epoch 00:  55%|█████▌    | 2676/4850 [12:14<09:57,  3.64it/s]

Epoch: 1, Loss: 3.8846969604492188


Processing epoch 00:  55%|█████▌    | 2677/4850 [12:14<09:54,  3.65it/s]

Epoch: 1, Loss: 3.6601319313049316


Processing epoch 00:  55%|█████▌    | 2678/4850 [12:15<09:54,  3.65it/s]

Epoch: 1, Loss: 3.5944857597351074


Processing epoch 00:  55%|█████▌    | 2679/4850 [12:15<09:53,  3.66it/s]

Epoch: 1, Loss: 2.947927951812744


Processing epoch 00:  55%|█████▌    | 2680/4850 [12:15<10:01,  3.61it/s]

Epoch: 1, Loss: 4.133260726928711


Processing epoch 00:  55%|█████▌    | 2681/4850 [12:15<10:00,  3.61it/s]

Epoch: 1, Loss: 3.936234474182129


Processing epoch 00:  55%|█████▌    | 2682/4850 [12:16<10:00,  3.61it/s]

Epoch: 1, Loss: 2.961088180541992


Processing epoch 00:  55%|█████▌    | 2683/4850 [12:16<09:58,  3.62it/s]

Epoch: 1, Loss: 3.4433541297912598


Processing epoch 00:  55%|█████▌    | 2684/4850 [12:16<09:58,  3.62it/s]

Epoch: 1, Loss: 2.664314031600952


Processing epoch 00:  55%|█████▌    | 2685/4850 [12:16<09:56,  3.63it/s]

Epoch: 1, Loss: 3.643585205078125


Processing epoch 00:  55%|█████▌    | 2686/4850 [12:17<09:53,  3.64it/s]

Epoch: 1, Loss: 3.612761974334717


Processing epoch 00:  55%|█████▌    | 2687/4850 [12:17<09:52,  3.65it/s]

Epoch: 1, Loss: 3.8205113410949707


Processing epoch 00:  55%|█████▌    | 2688/4850 [12:17<09:52,  3.65it/s]

Epoch: 1, Loss: 3.7607762813568115


Processing epoch 00:  55%|█████▌    | 2689/4850 [12:18<09:49,  3.66it/s]

Epoch: 1, Loss: 4.088295936584473


Processing epoch 00:  55%|█████▌    | 2690/4850 [12:18<09:52,  3.64it/s]

Epoch: 1, Loss: 3.083063840866089


Processing epoch 00:  55%|█████▌    | 2691/4850 [12:18<09:52,  3.65it/s]

Epoch: 1, Loss: 3.1440470218658447


Processing epoch 00:  56%|█████▌    | 2692/4850 [12:18<09:50,  3.65it/s]

Epoch: 1, Loss: 3.740161180496216


Processing epoch 00:  56%|█████▌    | 2693/4850 [12:19<09:48,  3.67it/s]

Epoch: 1, Loss: 3.7486791610717773


Processing epoch 00:  56%|█████▌    | 2694/4850 [12:19<09:49,  3.66it/s]

Epoch: 1, Loss: 2.904480457305908


Processing epoch 00:  56%|█████▌    | 2695/4850 [12:19<09:51,  3.64it/s]

Epoch: 1, Loss: 3.1938228607177734


Processing epoch 00:  56%|█████▌    | 2696/4850 [12:19<09:48,  3.66it/s]

Epoch: 1, Loss: 4.822336196899414


Processing epoch 00:  56%|█████▌    | 2697/4850 [12:20<09:47,  3.67it/s]

Epoch: 1, Loss: 4.298872947692871


Processing epoch 00:  56%|█████▌    | 2698/4850 [12:20<09:50,  3.64it/s]

Epoch: 1, Loss: 3.2170393466949463


Processing epoch 00:  56%|█████▌    | 2699/4850 [12:20<09:50,  3.64it/s]

Epoch: 1, Loss: 3.6100809574127197


Processing epoch 00:  56%|█████▌    | 2700/4850 [12:21<09:48,  3.65it/s]

Epoch: 1, Loss: 4.205280303955078


Processing epoch 00:  56%|█████▌    | 2701/4850 [12:21<09:45,  3.67it/s]

Epoch: 1, Loss: 3.642096519470215


Processing epoch 00:  56%|█████▌    | 2702/4850 [12:21<09:46,  3.66it/s]

Epoch: 1, Loss: 2.965475559234619


Processing epoch 00:  56%|█████▌    | 2703/4850 [12:21<09:47,  3.66it/s]

Epoch: 1, Loss: 3.2718758583068848


Processing epoch 00:  56%|█████▌    | 2704/4850 [12:22<09:45,  3.66it/s]

Epoch: 1, Loss: 3.908966541290283


Processing epoch 00:  56%|█████▌    | 2705/4850 [12:22<09:46,  3.66it/s]

Epoch: 1, Loss: 3.094877004623413


Processing epoch 00:  56%|█████▌    | 2706/4850 [12:22<09:47,  3.65it/s]

Epoch: 1, Loss: 4.47317361831665


Processing epoch 00:  56%|█████▌    | 2707/4850 [12:22<09:55,  3.60it/s]

Epoch: 1, Loss: 3.3426971435546875


Processing epoch 00:  56%|█████▌    | 2708/4850 [12:23<10:06,  3.53it/s]

Epoch: 1, Loss: 3.7053580284118652


Processing epoch 00:  56%|█████▌    | 2709/4850 [12:23<10:14,  3.48it/s]

Epoch: 1, Loss: 3.7028074264526367


Processing epoch 00:  56%|█████▌    | 2710/4850 [12:23<10:17,  3.47it/s]

Epoch: 1, Loss: 3.7487010955810547


Processing epoch 00:  56%|█████▌    | 2711/4850 [12:24<10:18,  3.46it/s]

Epoch: 1, Loss: 4.62368106842041


Processing epoch 00:  56%|█████▌    | 2712/4850 [12:24<10:13,  3.48it/s]

Epoch: 1, Loss: 3.1445908546447754


Processing epoch 00:  56%|█████▌    | 2713/4850 [12:24<10:13,  3.48it/s]

Epoch: 1, Loss: 4.61394739151001


Processing epoch 00:  56%|█████▌    | 2714/4850 [12:25<10:16,  3.47it/s]

Epoch: 1, Loss: 3.445713996887207


Processing epoch 00:  56%|█████▌    | 2715/4850 [12:25<10:20,  3.44it/s]

Epoch: 1, Loss: 4.764575958251953


Processing epoch 00:  56%|█████▌    | 2716/4850 [12:25<10:28,  3.39it/s]

Epoch: 1, Loss: 3.874101161956787


Processing epoch 00:  56%|█████▌    | 2717/4850 [12:25<10:19,  3.44it/s]

Epoch: 1, Loss: 4.558012008666992


Processing epoch 00:  56%|█████▌    | 2718/4850 [12:26<10:07,  3.51it/s]

Epoch: 1, Loss: 3.262965202331543


Processing epoch 00:  56%|█████▌    | 2719/4850 [12:26<09:59,  3.56it/s]

Epoch: 1, Loss: 2.7547495365142822


Processing epoch 00:  56%|█████▌    | 2720/4850 [12:26<09:56,  3.57it/s]

Epoch: 1, Loss: 3.6188080310821533


Processing epoch 00:  56%|█████▌    | 2721/4850 [12:26<09:49,  3.61it/s]

Epoch: 1, Loss: 4.031076908111572


Processing epoch 00:  56%|█████▌    | 2722/4850 [12:27<09:47,  3.62it/s]

Epoch: 1, Loss: 2.841604709625244


Processing epoch 00:  56%|█████▌    | 2723/4850 [12:27<09:46,  3.63it/s]

Epoch: 1, Loss: 3.0867161750793457


Processing epoch 00:  56%|█████▌    | 2724/4850 [12:27<09:43,  3.64it/s]

Epoch: 1, Loss: 4.066112518310547


Processing epoch 00:  56%|█████▌    | 2725/4850 [12:28<09:43,  3.64it/s]

Epoch: 1, Loss: 3.0329318046569824


Processing epoch 00:  56%|█████▌    | 2726/4850 [12:28<09:42,  3.65it/s]

Epoch: 1, Loss: 3.477736234664917


Processing epoch 00:  56%|█████▌    | 2727/4850 [12:28<09:41,  3.65it/s]

Epoch: 1, Loss: 3.046146869659424


Processing epoch 00:  56%|█████▌    | 2728/4850 [12:28<09:38,  3.67it/s]

Epoch: 1, Loss: 4.187676429748535


Processing epoch 00:  56%|█████▋    | 2729/4850 [12:29<09:39,  3.66it/s]

Epoch: 1, Loss: 2.8272910118103027


Processing epoch 00:  56%|█████▋    | 2730/4850 [12:29<09:39,  3.66it/s]

Epoch: 1, Loss: 3.437497138977051


Processing epoch 00:  56%|█████▋    | 2731/4850 [12:29<09:38,  3.66it/s]

Epoch: 1, Loss: 4.18372106552124


Processing epoch 00:  56%|█████▋    | 2732/4850 [12:30<09:46,  3.61it/s]

Epoch: 1, Loss: 3.1885924339294434


Processing epoch 00:  56%|█████▋    | 2733/4850 [12:30<09:43,  3.63it/s]

Epoch: 1, Loss: 3.5276236534118652


Processing epoch 00:  56%|█████▋    | 2734/4850 [12:30<09:42,  3.63it/s]

Epoch: 1, Loss: 3.39788818359375


Processing epoch 00:  56%|█████▋    | 2735/4850 [12:30<09:42,  3.63it/s]

Epoch: 1, Loss: 4.309600830078125


Processing epoch 00:  56%|█████▋    | 2736/4850 [12:31<09:41,  3.63it/s]

Epoch: 1, Loss: 3.130016803741455


Processing epoch 00:  56%|█████▋    | 2737/4850 [12:31<09:42,  3.63it/s]

Epoch: 1, Loss: 3.305093765258789


Processing epoch 00:  56%|█████▋    | 2738/4850 [12:31<09:40,  3.64it/s]

Epoch: 1, Loss: 3.3871326446533203


Processing epoch 00:  56%|█████▋    | 2739/4850 [12:31<09:38,  3.65it/s]

Epoch: 1, Loss: 3.9464268684387207


Processing epoch 00:  56%|█████▋    | 2740/4850 [12:32<09:36,  3.66it/s]

Epoch: 1, Loss: 4.0619120597839355


Processing epoch 00:  57%|█████▋    | 2741/4850 [12:32<09:36,  3.66it/s]

Epoch: 1, Loss: 3.932347297668457


Processing epoch 00:  57%|█████▋    | 2742/4850 [12:32<09:35,  3.66it/s]

Epoch: 1, Loss: 3.3520309925079346


Processing epoch 00:  57%|█████▋    | 2743/4850 [12:33<09:36,  3.66it/s]

Epoch: 1, Loss: 2.7212562561035156


Processing epoch 00:  57%|█████▋    | 2744/4850 [12:33<09:35,  3.66it/s]

Epoch: 1, Loss: 3.118269443511963


Processing epoch 00:  57%|█████▋    | 2745/4850 [12:33<09:35,  3.66it/s]

Epoch: 1, Loss: 3.506948947906494


Processing epoch 00:  57%|█████▋    | 2746/4850 [12:33<09:32,  3.67it/s]

Epoch: 1, Loss: 4.475265026092529


Processing epoch 00:  57%|█████▋    | 2747/4850 [12:34<09:39,  3.63it/s]

Epoch: 1, Loss: 3.1081087589263916


Processing epoch 00:  57%|█████▋    | 2748/4850 [12:34<09:36,  3.64it/s]

Epoch: 1, Loss: 4.247032165527344


Processing epoch 00:  57%|█████▋    | 2749/4850 [12:34<09:32,  3.67it/s]

Epoch: 1, Loss: 4.2192606925964355


Processing epoch 00:  57%|█████▋    | 2750/4850 [12:34<09:31,  3.68it/s]

Epoch: 1, Loss: 4.402164459228516


Processing epoch 00:  57%|█████▋    | 2751/4850 [12:35<09:31,  3.67it/s]

Epoch: 1, Loss: 4.144356727600098


Processing epoch 00:  57%|█████▋    | 2752/4850 [12:35<09:31,  3.67it/s]

Epoch: 1, Loss: 3.6210241317749023


Processing epoch 00:  57%|█████▋    | 2753/4850 [12:35<09:33,  3.65it/s]

Epoch: 1, Loss: 3.1819682121276855


Processing epoch 00:  57%|█████▋    | 2754/4850 [12:36<09:43,  3.59it/s]

Epoch: 1, Loss: 3.3334827423095703


Processing epoch 00:  57%|█████▋    | 2755/4850 [12:36<09:53,  3.53it/s]

Epoch: 1, Loss: 3.5131335258483887


Processing epoch 00:  57%|█████▋    | 2756/4850 [12:36<09:51,  3.54it/s]

Epoch: 1, Loss: 3.33859920501709


Processing epoch 00:  57%|█████▋    | 2757/4850 [12:36<09:53,  3.53it/s]

Epoch: 1, Loss: 5.151036262512207


Processing epoch 00:  57%|█████▋    | 2758/4850 [12:37<10:08,  3.44it/s]

Epoch: 1, Loss: 3.306121587753296


Processing epoch 00:  57%|█████▋    | 2759/4850 [12:37<10:21,  3.37it/s]

Epoch: 1, Loss: 3.8299148082733154


Processing epoch 00:  57%|█████▋    | 2760/4850 [12:37<10:16,  3.39it/s]

Epoch: 1, Loss: 3.7131361961364746


Processing epoch 00:  57%|█████▋    | 2761/4850 [12:38<10:16,  3.39it/s]

Epoch: 1, Loss: 3.0925450325012207


Processing epoch 00:  57%|█████▋    | 2762/4850 [12:38<10:14,  3.40it/s]

Epoch: 1, Loss: 3.999912738800049


Processing epoch 00:  57%|█████▋    | 2763/4850 [12:38<10:15,  3.39it/s]

Epoch: 1, Loss: 3.2489771842956543


Processing epoch 00:  57%|█████▋    | 2764/4850 [12:38<10:06,  3.44it/s]

Epoch: 1, Loss: 2.840670585632324


Processing epoch 00:  57%|█████▋    | 2765/4850 [12:39<09:55,  3.50it/s]

Epoch: 1, Loss: 3.720444917678833


Processing epoch 00:  57%|█████▋    | 2766/4850 [12:39<09:48,  3.54it/s]

Epoch: 1, Loss: 2.708834171295166


Processing epoch 00:  57%|█████▋    | 2767/4850 [12:39<09:42,  3.58it/s]

Epoch: 1, Loss: 2.883375883102417


Processing epoch 00:  57%|█████▋    | 2768/4850 [12:40<09:34,  3.62it/s]

Epoch: 1, Loss: 4.104585647583008


Processing epoch 00:  57%|█████▋    | 2769/4850 [12:40<09:42,  3.57it/s]

Epoch: 1, Loss: 3.256037712097168


Processing epoch 00:  57%|█████▋    | 2770/4850 [12:40<09:38,  3.60it/s]

Epoch: 1, Loss: 3.768160820007324


Processing epoch 00:  57%|█████▋    | 2771/4850 [12:40<09:37,  3.60it/s]

Epoch: 1, Loss: 2.9968934059143066


Processing epoch 00:  57%|█████▋    | 2772/4850 [12:41<09:36,  3.61it/s]

Epoch: 1, Loss: 3.527073860168457


Processing epoch 00:  57%|█████▋    | 2773/4850 [12:41<09:34,  3.62it/s]

Epoch: 1, Loss: 2.873662233352661


Processing epoch 00:  57%|█████▋    | 2774/4850 [12:41<09:28,  3.65it/s]

Epoch: 1, Loss: 3.640150785446167


Processing epoch 00:  57%|█████▋    | 2775/4850 [12:41<09:28,  3.65it/s]

Epoch: 1, Loss: 4.597566604614258


Processing epoch 00:  57%|█████▋    | 2776/4850 [12:42<09:28,  3.65it/s]

Epoch: 1, Loss: 3.9805965423583984


Processing epoch 00:  57%|█████▋    | 2777/4850 [12:42<09:27,  3.65it/s]

Epoch: 1, Loss: 3.777317523956299


Processing epoch 00:  57%|█████▋    | 2778/4850 [12:42<09:28,  3.64it/s]

Epoch: 1, Loss: 4.243257999420166


Processing epoch 00:  57%|█████▋    | 2779/4850 [12:43<09:26,  3.65it/s]

Epoch: 1, Loss: 3.109847068786621


Processing epoch 00:  57%|█████▋    | 2780/4850 [12:43<09:26,  3.65it/s]

Epoch: 1, Loss: 3.989309787750244


Processing epoch 00:  57%|█████▋    | 2781/4850 [12:43<09:24,  3.67it/s]

Epoch: 1, Loss: 4.1118669509887695


Processing epoch 00:  57%|█████▋    | 2782/4850 [12:43<09:21,  3.68it/s]

Epoch: 1, Loss: 4.340530872344971


Processing epoch 00:  57%|█████▋    | 2783/4850 [12:44<09:26,  3.65it/s]

Epoch: 1, Loss: 3.2148189544677734


Processing epoch 00:  57%|█████▋    | 2784/4850 [12:44<09:31,  3.62it/s]

Epoch: 1, Loss: 3.4718964099884033


Processing epoch 00:  57%|█████▋    | 2785/4850 [12:44<09:27,  3.64it/s]

Epoch: 1, Loss: 3.5239481925964355


Processing epoch 00:  57%|█████▋    | 2786/4850 [12:45<09:26,  3.64it/s]

Epoch: 1, Loss: 3.163297653198242


Processing epoch 00:  57%|█████▋    | 2787/4850 [12:45<09:26,  3.64it/s]

Epoch: 1, Loss: 3.3524937629699707


Processing epoch 00:  57%|█████▋    | 2788/4850 [12:45<09:26,  3.64it/s]

Epoch: 1, Loss: 3.4970836639404297


Processing epoch 00:  58%|█████▊    | 2789/4850 [12:45<09:22,  3.66it/s]

Epoch: 1, Loss: 4.853053092956543


Processing epoch 00:  58%|█████▊    | 2790/4850 [12:46<09:22,  3.66it/s]

Epoch: 1, Loss: 3.034390926361084


Processing epoch 00:  58%|█████▊    | 2791/4850 [12:46<09:21,  3.67it/s]

Epoch: 1, Loss: 2.8346238136291504


Processing epoch 00:  58%|█████▊    | 2792/4850 [12:46<09:21,  3.67it/s]

Epoch: 1, Loss: 3.2010860443115234


Processing epoch 00:  58%|█████▊    | 2793/4850 [12:46<09:21,  3.66it/s]

Epoch: 1, Loss: 3.194340229034424


Processing epoch 00:  58%|█████▊    | 2794/4850 [12:47<09:21,  3.66it/s]

Epoch: 1, Loss: 3.6263294219970703


Processing epoch 00:  58%|█████▊    | 2795/4850 [12:47<09:22,  3.65it/s]

Epoch: 1, Loss: 3.3749794960021973


Processing epoch 00:  58%|█████▊    | 2796/4850 [12:47<09:19,  3.67it/s]

Epoch: 1, Loss: 3.329195022583008


Processing epoch 00:  58%|█████▊    | 2797/4850 [12:48<09:19,  3.67it/s]

Epoch: 1, Loss: 3.4469220638275146


Processing epoch 00:  58%|█████▊    | 2798/4850 [12:48<09:19,  3.67it/s]

Epoch: 1, Loss: 3.0040318965911865


Processing epoch 00:  58%|█████▊    | 2799/4850 [12:48<09:26,  3.62it/s]

Epoch: 1, Loss: 2.898789167404175


Processing epoch 00:  58%|█████▊    | 2800/4850 [12:48<09:35,  3.56it/s]

Epoch: 1, Loss: 3.03878116607666


Processing epoch 00:  58%|█████▊    | 2801/4850 [12:49<09:41,  3.52it/s]

Epoch: 1, Loss: 4.575836181640625


Processing epoch 00:  58%|█████▊    | 2802/4850 [12:49<09:39,  3.53it/s]

Epoch: 1, Loss: 3.2513632774353027


Processing epoch 00:  58%|█████▊    | 2803/4850 [12:49<09:40,  3.52it/s]

Epoch: 1, Loss: 3.6547069549560547


Processing epoch 00:  58%|█████▊    | 2804/4850 [12:50<09:41,  3.52it/s]

Epoch: 1, Loss: 3.161188840866089


Processing epoch 00:  58%|█████▊    | 2805/4850 [12:50<09:38,  3.53it/s]

Epoch: 1, Loss: 3.882319211959839


Processing epoch 00:  58%|█████▊    | 2806/4850 [12:50<09:48,  3.47it/s]

Epoch: 1, Loss: 3.5927045345306396


Processing epoch 00:  58%|█████▊    | 2807/4850 [12:50<09:58,  3.42it/s]

Epoch: 1, Loss: 3.3682050704956055


Processing epoch 00:  58%|█████▊    | 2808/4850 [12:51<10:06,  3.37it/s]

Epoch: 1, Loss: 3.5786664485931396


Processing epoch 00:  58%|█████▊    | 2809/4850 [12:51<10:12,  3.33it/s]

Epoch: 1, Loss: 3.541757106781006


Processing epoch 00:  58%|█████▊    | 2810/4850 [12:51<10:02,  3.38it/s]

Epoch: 1, Loss: 3.581979513168335


Processing epoch 00:  58%|█████▊    | 2811/4850 [12:52<09:52,  3.44it/s]

Epoch: 1, Loss: 3.2861499786376953


Processing epoch 00:  58%|█████▊    | 2812/4850 [12:52<09:42,  3.50it/s]

Epoch: 1, Loss: 3.390265941619873


Processing epoch 00:  58%|█████▊    | 2813/4850 [12:52<09:33,  3.55it/s]

Epoch: 1, Loss: 3.1781795024871826


Processing epoch 00:  58%|█████▊    | 2814/4850 [12:52<09:26,  3.60it/s]

Epoch: 1, Loss: 3.824833869934082


Processing epoch 00:  58%|█████▊    | 2815/4850 [12:53<09:22,  3.62it/s]

Epoch: 1, Loss: 4.233195781707764


Processing epoch 00:  58%|█████▊    | 2816/4850 [12:53<09:19,  3.63it/s]

Epoch: 1, Loss: 3.6995177268981934


Processing epoch 00:  58%|█████▊    | 2817/4850 [12:53<09:17,  3.65it/s]

Epoch: 1, Loss: 3.9238719940185547


Processing epoch 00:  58%|█████▊    | 2818/4850 [12:53<09:17,  3.64it/s]

Epoch: 1, Loss: 2.570885419845581


Processing epoch 00:  58%|█████▊    | 2819/4850 [12:54<09:15,  3.65it/s]

Epoch: 1, Loss: 3.1284260749816895


Processing epoch 00:  58%|█████▊    | 2820/4850 [12:54<09:13,  3.67it/s]

Epoch: 1, Loss: 5.351006984710693


Processing epoch 00:  58%|█████▊    | 2821/4850 [12:54<09:15,  3.65it/s]

Epoch: 1, Loss: 3.7093088626861572


Processing epoch 00:  58%|█████▊    | 2822/4850 [12:55<09:16,  3.65it/s]

Epoch: 1, Loss: 4.313313961029053


Processing epoch 00:  58%|█████▊    | 2823/4850 [12:55<09:14,  3.65it/s]

Epoch: 1, Loss: 4.301684856414795


Processing epoch 00:  58%|█████▊    | 2824/4850 [12:55<09:14,  3.65it/s]

Epoch: 1, Loss: 3.1336731910705566


Processing epoch 00:  58%|█████▊    | 2825/4850 [12:55<09:13,  3.66it/s]

Epoch: 1, Loss: 2.7762653827667236


Processing epoch 00:  58%|█████▊    | 2826/4850 [12:56<09:12,  3.66it/s]

Epoch: 1, Loss: 2.8134326934814453


Processing epoch 00:  58%|█████▊    | 2827/4850 [12:56<09:12,  3.66it/s]

Epoch: 1, Loss: 3.2308716773986816


Processing epoch 00:  58%|█████▊    | 2828/4850 [12:56<09:09,  3.68it/s]

Epoch: 1, Loss: 3.239628553390503


Processing epoch 00:  58%|█████▊    | 2829/4850 [12:56<09:10,  3.67it/s]

Epoch: 1, Loss: 3.1924476623535156


Processing epoch 00:  58%|█████▊    | 2830/4850 [12:57<09:10,  3.67it/s]

Epoch: 1, Loss: 3.118034839630127


Processing epoch 00:  58%|█████▊    | 2831/4850 [12:57<09:08,  3.68it/s]

Epoch: 1, Loss: 3.8723857402801514


Processing epoch 00:  58%|█████▊    | 2832/4850 [12:57<09:09,  3.67it/s]

Epoch: 1, Loss: 3.471111297607422


Processing epoch 00:  58%|█████▊    | 2833/4850 [12:58<09:09,  3.67it/s]

Epoch: 1, Loss: 4.484756946563721


Processing epoch 00:  58%|█████▊    | 2834/4850 [12:58<09:07,  3.69it/s]

Epoch: 1, Loss: 3.9531867504119873


Processing epoch 00:  58%|█████▊    | 2835/4850 [12:58<09:07,  3.68it/s]

Epoch: 1, Loss: 4.590597152709961


Processing epoch 00:  58%|█████▊    | 2836/4850 [12:58<09:11,  3.65it/s]

Epoch: 1, Loss: 3.031453847885132


Processing epoch 00:  58%|█████▊    | 2837/4850 [12:59<09:11,  3.65it/s]

Epoch: 1, Loss: 3.2524561882019043


Processing epoch 00:  59%|█████▊    | 2838/4850 [12:59<09:12,  3.64it/s]

Epoch: 1, Loss: 2.933147430419922


Processing epoch 00:  59%|█████▊    | 2839/4850 [12:59<09:11,  3.65it/s]

Epoch: 1, Loss: 3.296962261199951


Processing epoch 00:  59%|█████▊    | 2840/4850 [12:59<09:09,  3.66it/s]

Epoch: 1, Loss: 4.334127902984619


Processing epoch 00:  59%|█████▊    | 2841/4850 [13:00<09:12,  3.63it/s]

Epoch: 1, Loss: 3.4199256896972656


Processing epoch 00:  59%|█████▊    | 2842/4850 [13:00<09:13,  3.63it/s]

Epoch: 1, Loss: 3.552945613861084


Processing epoch 00:  59%|█████▊    | 2843/4850 [13:00<09:11,  3.64it/s]

Epoch: 1, Loss: 4.120450019836426


Processing epoch 00:  59%|█████▊    | 2844/4850 [13:01<09:12,  3.63it/s]

Epoch: 1, Loss: 3.1874160766601562


Processing epoch 00:  59%|█████▊    | 2845/4850 [13:01<09:11,  3.64it/s]

Epoch: 1, Loss: 4.024379730224609


Processing epoch 00:  59%|█████▊    | 2846/4850 [13:01<09:09,  3.65it/s]

Epoch: 1, Loss: 2.757669687271118


Processing epoch 00:  59%|█████▊    | 2847/4850 [13:01<09:07,  3.66it/s]

Epoch: 1, Loss: 3.5421886444091797


Processing epoch 00:  59%|█████▊    | 2848/4850 [13:02<09:19,  3.58it/s]

Epoch: 1, Loss: 3.584184408187866


Processing epoch 00:  59%|█████▊    | 2849/4850 [13:02<09:29,  3.51it/s]

Epoch: 1, Loss: 3.1460201740264893


Processing epoch 00:  59%|█████▉    | 2850/4850 [13:02<09:37,  3.46it/s]

Epoch: 1, Loss: 3.6517107486724854


Processing epoch 00:  59%|█████▉    | 2851/4850 [13:03<09:33,  3.48it/s]

Epoch: 1, Loss: 4.02557897567749


Processing epoch 00:  59%|█████▉    | 2852/4850 [13:03<09:27,  3.52it/s]

Epoch: 1, Loss: 4.680178165435791


Processing epoch 00:  59%|█████▉    | 2853/4850 [13:03<09:24,  3.54it/s]

Epoch: 1, Loss: 3.2320213317871094


Processing epoch 00:  59%|█████▉    | 2854/4850 [13:03<09:34,  3.47it/s]

Epoch: 1, Loss: 3.2748072147369385


Processing epoch 00:  59%|█████▉    | 2855/4850 [13:04<09:43,  3.42it/s]

Epoch: 1, Loss: 3.4189229011535645


Processing epoch 00:  59%|█████▉    | 2856/4850 [13:04<09:43,  3.42it/s]

Epoch: 1, Loss: 3.3291540145874023


Processing epoch 00:  59%|█████▉    | 2857/4850 [13:04<09:42,  3.42it/s]

Epoch: 1, Loss: 3.210188388824463


Processing epoch 00:  59%|█████▉    | 2858/4850 [13:05<09:52,  3.36it/s]

Epoch: 1, Loss: 3.3284246921539307


Processing epoch 00:  59%|█████▉    | 2859/4850 [13:05<09:43,  3.41it/s]

Epoch: 1, Loss: 3.2956719398498535


Processing epoch 00:  59%|█████▉    | 2860/4850 [13:05<09:30,  3.49it/s]

Epoch: 1, Loss: 2.9851627349853516


Processing epoch 00:  59%|█████▉    | 2861/4850 [13:05<09:19,  3.55it/s]

Epoch: 1, Loss: 4.005552291870117


Processing epoch 00:  59%|█████▉    | 2862/4850 [13:06<09:19,  3.56it/s]

Epoch: 1, Loss: 3.1622719764709473


Processing epoch 00:  59%|█████▉    | 2863/4850 [13:06<09:15,  3.58it/s]

Epoch: 1, Loss: 4.124429702758789


Processing epoch 00:  59%|█████▉    | 2864/4850 [13:06<09:10,  3.61it/s]

Epoch: 1, Loss: 3.787900924682617


Processing epoch 00:  59%|█████▉    | 2865/4850 [13:07<09:09,  3.61it/s]

Epoch: 1, Loss: 3.260723114013672


Processing epoch 00:  59%|█████▉    | 2866/4850 [13:07<09:08,  3.62it/s]

Epoch: 1, Loss: 3.6857402324676514


Processing epoch 00:  59%|█████▉    | 2867/4850 [13:07<09:06,  3.63it/s]

Epoch: 1, Loss: 2.5449411869049072


Processing epoch 00:  59%|█████▉    | 2868/4850 [13:07<09:06,  3.63it/s]

Epoch: 1, Loss: 3.330962657928467


Processing epoch 00:  59%|█████▉    | 2869/4850 [13:08<09:04,  3.64it/s]

Epoch: 1, Loss: 2.8283984661102295


Processing epoch 00:  59%|█████▉    | 2870/4850 [13:08<09:03,  3.64it/s]

Epoch: 1, Loss: 3.087919235229492


Processing epoch 00:  59%|█████▉    | 2871/4850 [13:08<09:04,  3.63it/s]

Epoch: 1, Loss: 3.4318010807037354


Processing epoch 00:  59%|█████▉    | 2872/4850 [13:08<09:04,  3.63it/s]

Epoch: 1, Loss: 2.7132997512817383


Processing epoch 00:  59%|█████▉    | 2873/4850 [13:09<09:11,  3.58it/s]

Epoch: 1, Loss: 2.9777441024780273


Processing epoch 00:  59%|█████▉    | 2874/4850 [13:09<09:08,  3.60it/s]

Epoch: 1, Loss: 3.6427524089813232


Processing epoch 00:  59%|█████▉    | 2875/4850 [13:09<09:06,  3.62it/s]

Epoch: 1, Loss: 3.271650791168213


Processing epoch 00:  59%|█████▉    | 2876/4850 [13:10<09:03,  3.63it/s]

Epoch: 1, Loss: 4.679305553436279


Processing epoch 00:  59%|█████▉    | 2877/4850 [13:10<09:02,  3.64it/s]

Epoch: 1, Loss: 3.923326015472412


Processing epoch 00:  59%|█████▉    | 2878/4850 [13:10<09:00,  3.65it/s]

Epoch: 1, Loss: 3.198366641998291


Processing epoch 00:  59%|█████▉    | 2879/4850 [13:10<08:58,  3.66it/s]

Epoch: 1, Loss: 3.5107874870300293


Processing epoch 00:  59%|█████▉    | 2880/4850 [13:11<09:00,  3.65it/s]

Epoch: 1, Loss: 3.8812787532806396


Processing epoch 00:  59%|█████▉    | 2881/4850 [13:11<08:58,  3.65it/s]

Epoch: 1, Loss: 3.2979674339294434


Processing epoch 00:  59%|█████▉    | 2882/4850 [13:11<08:58,  3.65it/s]

Epoch: 1, Loss: 5.0419793128967285


Processing epoch 00:  59%|█████▉    | 2883/4850 [13:11<08:56,  3.67it/s]

Epoch: 1, Loss: 3.3377230167388916


Processing epoch 00:  59%|█████▉    | 2884/4850 [13:12<08:58,  3.65it/s]

Epoch: 1, Loss: 4.087054252624512


Processing epoch 00:  59%|█████▉    | 2885/4850 [13:12<08:59,  3.64it/s]

Epoch: 1, Loss: 3.5242538452148438


Processing epoch 00:  60%|█████▉    | 2886/4850 [13:12<09:15,  3.53it/s]

Epoch: 1, Loss: 3.955023765563965


Processing epoch 00:  60%|█████▉    | 2887/4850 [13:13<09:23,  3.48it/s]

Epoch: 1, Loss: 3.975893974304199


Processing epoch 00:  60%|█████▉    | 2888/4850 [13:13<09:21,  3.50it/s]

Epoch: 1, Loss: 3.805845260620117


Processing epoch 00:  60%|█████▉    | 2889/4850 [13:13<09:21,  3.49it/s]

Epoch: 1, Loss: 4.385811805725098


Processing epoch 00:  60%|█████▉    | 2890/4850 [13:14<09:18,  3.51it/s]

Epoch: 1, Loss: 4.157236576080322


Processing epoch 00:  60%|█████▉    | 2891/4850 [13:14<09:20,  3.50it/s]

Epoch: 1, Loss: 3.7659759521484375


Processing epoch 00:  60%|█████▉    | 2892/4850 [13:14<09:25,  3.46it/s]

Epoch: 1, Loss: 3.4586544036865234


Processing epoch 00:  60%|█████▉    | 2893/4850 [13:14<09:28,  3.44it/s]

Epoch: 1, Loss: 4.1599345207214355


Processing epoch 00:  60%|█████▉    | 2894/4850 [13:15<09:36,  3.39it/s]

Epoch: 1, Loss: 3.491640329360962


Processing epoch 00:  60%|█████▉    | 2895/4850 [13:15<09:44,  3.35it/s]

Epoch: 1, Loss: 3.5823140144348145


Processing epoch 00:  60%|█████▉    | 2896/4850 [13:15<09:49,  3.32it/s]

Epoch: 1, Loss: 3.4131453037261963


Processing epoch 00:  60%|█████▉    | 2897/4850 [13:16<09:43,  3.35it/s]

Epoch: 1, Loss: 3.5514473915100098


Processing epoch 00:  60%|█████▉    | 2898/4850 [13:16<09:34,  3.40it/s]

Epoch: 1, Loss: 3.190197706222534


Processing epoch 00:  60%|█████▉    | 2899/4850 [13:16<09:26,  3.45it/s]

Epoch: 1, Loss: 4.057349681854248


Processing epoch 00:  60%|█████▉    | 2900/4850 [13:16<09:37,  3.37it/s]

Epoch: 1, Loss: 3.617049217224121


Processing epoch 00:  60%|█████▉    | 2901/4850 [13:17<09:38,  3.37it/s]

Epoch: 1, Loss: 3.5662500858306885


Processing epoch 00:  60%|█████▉    | 2902/4850 [13:17<09:42,  3.35it/s]

Epoch: 1, Loss: 3.244485855102539


Processing epoch 00:  60%|█████▉    | 2903/4850 [13:17<09:46,  3.32it/s]

Epoch: 1, Loss: 3.227931022644043


Processing epoch 00:  60%|█████▉    | 2904/4850 [13:18<09:35,  3.38it/s]

Epoch: 1, Loss: 4.72657585144043


Processing epoch 00:  60%|█████▉    | 2905/4850 [13:18<09:30,  3.41it/s]

Epoch: 1, Loss: 3.267862319946289


Processing epoch 00:  60%|█████▉    | 2906/4850 [13:18<09:37,  3.36it/s]

Epoch: 1, Loss: 4.1071014404296875


Processing epoch 00:  60%|█████▉    | 2907/4850 [13:19<09:25,  3.43it/s]

Epoch: 1, Loss: 3.055830955505371


Processing epoch 00:  60%|█████▉    | 2908/4850 [13:19<09:15,  3.49it/s]

Epoch: 1, Loss: 3.065749168395996


Processing epoch 00:  60%|█████▉    | 2909/4850 [13:19<09:10,  3.53it/s]

Epoch: 1, Loss: 3.1274185180664062


Processing epoch 00:  60%|██████    | 2910/4850 [13:19<09:06,  3.55it/s]

Epoch: 1, Loss: 3.44063663482666


Processing epoch 00:  60%|██████    | 2911/4850 [13:20<09:02,  3.58it/s]

Epoch: 1, Loss: 3.5848841667175293


Processing epoch 00:  60%|██████    | 2912/4850 [13:20<08:58,  3.60it/s]

Epoch: 1, Loss: 4.173954963684082


Processing epoch 00:  60%|██████    | 2913/4850 [13:20<08:59,  3.59it/s]

Epoch: 1, Loss: 3.8119313716888428


Processing epoch 00:  60%|██████    | 2914/4850 [13:20<08:55,  3.62it/s]

Epoch: 1, Loss: 3.084273338317871


Processing epoch 00:  60%|██████    | 2915/4850 [13:21<08:55,  3.61it/s]

Epoch: 1, Loss: 3.3810696601867676


Processing epoch 00:  60%|██████    | 2916/4850 [13:21<08:52,  3.63it/s]

Epoch: 1, Loss: 3.649240016937256


Processing epoch 00:  60%|██████    | 2917/4850 [13:21<08:49,  3.65it/s]

Epoch: 1, Loss: 3.1979241371154785


Processing epoch 00:  60%|██████    | 2918/4850 [13:22<08:49,  3.65it/s]

Epoch: 1, Loss: 2.813699245452881


Processing epoch 00:  60%|██████    | 2919/4850 [13:22<08:51,  3.64it/s]

Epoch: 1, Loss: 2.8160598278045654


Processing epoch 00:  60%|██████    | 2920/4850 [13:22<08:49,  3.65it/s]

Epoch: 1, Loss: 3.9035251140594482


Processing epoch 00:  60%|██████    | 2921/4850 [13:22<08:48,  3.65it/s]

Epoch: 1, Loss: 3.4559555053710938


Processing epoch 00:  60%|██████    | 2922/4850 [13:23<08:47,  3.66it/s]

Epoch: 1, Loss: 3.7480969429016113


Processing epoch 00:  60%|██████    | 2923/4850 [13:23<08:47,  3.66it/s]

Epoch: 1, Loss: 3.745102882385254


Processing epoch 00:  60%|██████    | 2924/4850 [13:23<08:47,  3.65it/s]

Epoch: 1, Loss: 2.9112441539764404


Processing epoch 00:  60%|██████    | 2925/4850 [13:23<08:44,  3.67it/s]

Epoch: 1, Loss: 3.9058191776275635


Processing epoch 00:  60%|██████    | 2926/4850 [13:24<08:45,  3.66it/s]

Epoch: 1, Loss: 3.4580767154693604


Processing epoch 00:  60%|██████    | 2927/4850 [13:24<08:45,  3.66it/s]

Epoch: 1, Loss: 3.30342960357666


Processing epoch 00:  60%|██████    | 2928/4850 [13:24<08:49,  3.63it/s]

Epoch: 1, Loss: 4.251288414001465


Processing epoch 00:  60%|██████    | 2929/4850 [13:25<08:49,  3.63it/s]

Epoch: 1, Loss: 3.0175716876983643


Processing epoch 00:  60%|██████    | 2930/4850 [13:25<08:48,  3.63it/s]

Epoch: 1, Loss: 3.0203700065612793


Processing epoch 00:  60%|██████    | 2931/4850 [13:25<08:46,  3.65it/s]

Epoch: 1, Loss: 3.501351833343506


Processing epoch 00:  60%|██████    | 2932/4850 [13:25<08:47,  3.64it/s]

Epoch: 1, Loss: 3.282069683074951


Processing epoch 00:  60%|██████    | 2933/4850 [13:26<08:45,  3.65it/s]

Epoch: 1, Loss: 3.7326607704162598


Processing epoch 00:  60%|██████    | 2934/4850 [13:26<08:45,  3.64it/s]

Epoch: 1, Loss: 3.528757333755493


Processing epoch 00:  61%|██████    | 2935/4850 [13:26<08:44,  3.65it/s]

Epoch: 1, Loss: 3.589077949523926


Processing epoch 00:  61%|██████    | 2936/4850 [13:26<08:44,  3.65it/s]

Epoch: 1, Loss: 3.2121505737304688


Processing epoch 00:  61%|██████    | 2937/4850 [13:27<08:46,  3.64it/s]

Epoch: 1, Loss: 2.76121187210083


Processing epoch 00:  61%|██████    | 2938/4850 [13:27<08:45,  3.64it/s]

Epoch: 1, Loss: 4.054235458374023


Processing epoch 00:  61%|██████    | 2939/4850 [13:27<08:43,  3.65it/s]

Epoch: 1, Loss: 3.652599334716797


Processing epoch 00:  61%|██████    | 2940/4850 [13:28<08:43,  3.65it/s]

Epoch: 1, Loss: 3.5648550987243652


Processing epoch 00:  61%|██████    | 2941/4850 [13:28<08:41,  3.66it/s]

Epoch: 1, Loss: 2.781212568283081


Processing epoch 00:  61%|██████    | 2942/4850 [13:28<08:41,  3.66it/s]

Epoch: 1, Loss: 2.957080841064453


Processing epoch 00:  61%|██████    | 2943/4850 [13:28<09:01,  3.52it/s]

Epoch: 1, Loss: 3.3368091583251953


Processing epoch 00:  61%|██████    | 2944/4850 [13:29<08:58,  3.54it/s]

Epoch: 1, Loss: 3.2093136310577393


Processing epoch 00:  61%|██████    | 2945/4850 [13:29<09:00,  3.52it/s]

Epoch: 1, Loss: 3.19850754737854


Processing epoch 00:  61%|██████    | 2946/4850 [13:29<09:03,  3.50it/s]

Epoch: 1, Loss: 3.4624574184417725


Processing epoch 00:  61%|██████    | 2947/4850 [13:30<09:01,  3.52it/s]

Epoch: 1, Loss: 3.3574366569519043


Processing epoch 00:  61%|██████    | 2948/4850 [13:30<09:09,  3.46it/s]

Epoch: 1, Loss: 3.549650192260742


Processing epoch 00:  61%|██████    | 2949/4850 [13:30<09:05,  3.49it/s]

Epoch: 1, Loss: 3.1409969329833984


Processing epoch 00:  61%|██████    | 2950/4850 [13:30<09:10,  3.45it/s]

Epoch: 1, Loss: 3.0543830394744873


Processing epoch 00:  61%|██████    | 2951/4850 [13:31<09:07,  3.47it/s]

Epoch: 1, Loss: 3.3777623176574707


Processing epoch 00:  61%|██████    | 2952/4850 [13:31<09:07,  3.47it/s]

Epoch: 1, Loss: 3.627760887145996


Processing epoch 00:  61%|██████    | 2953/4850 [13:31<09:15,  3.41it/s]

Epoch: 1, Loss: 3.3620457649230957


Processing epoch 00:  61%|██████    | 2954/4850 [13:32<09:09,  3.45it/s]

Epoch: 1, Loss: 4.652832984924316


Processing epoch 00:  61%|██████    | 2955/4850 [13:32<08:59,  3.51it/s]

Epoch: 1, Loss: 4.4817705154418945


Processing epoch 00:  61%|██████    | 2956/4850 [13:32<08:53,  3.55it/s]

Epoch: 1, Loss: 4.385267734527588


Processing epoch 00:  61%|██████    | 2957/4850 [13:32<08:45,  3.60it/s]

Epoch: 1, Loss: 3.8542840480804443


Processing epoch 00:  61%|██████    | 2958/4850 [13:33<08:42,  3.62it/s]

Epoch: 1, Loss: 3.2628636360168457


Processing epoch 00:  61%|██████    | 2959/4850 [13:33<08:40,  3.63it/s]

Epoch: 1, Loss: 2.8489246368408203


Processing epoch 00:  61%|██████    | 2960/4850 [13:33<08:40,  3.63it/s]

Epoch: 1, Loss: 3.587350368499756


Processing epoch 00:  61%|██████    | 2961/4850 [13:34<08:38,  3.64it/s]

Epoch: 1, Loss: 2.411888360977173


Processing epoch 00:  61%|██████    | 2962/4850 [13:34<08:37,  3.65it/s]

Epoch: 1, Loss: 3.1367452144622803


Processing epoch 00:  61%|██████    | 2963/4850 [13:34<08:37,  3.65it/s]

Epoch: 1, Loss: 2.8918027877807617


Processing epoch 00:  61%|██████    | 2964/4850 [13:34<08:35,  3.66it/s]

Epoch: 1, Loss: 3.0387234687805176


Processing epoch 00:  61%|██████    | 2965/4850 [13:35<08:36,  3.65it/s]

Epoch: 1, Loss: 3.899689197540283


Processing epoch 00:  61%|██████    | 2966/4850 [13:35<08:37,  3.64it/s]

Epoch: 1, Loss: 3.010958433151245


Processing epoch 00:  61%|██████    | 2967/4850 [13:35<08:36,  3.65it/s]

Epoch: 1, Loss: 3.6079459190368652


Processing epoch 00:  61%|██████    | 2968/4850 [13:35<08:35,  3.65it/s]

Epoch: 1, Loss: 3.333582639694214


Processing epoch 00:  61%|██████    | 2969/4850 [13:36<08:38,  3.63it/s]

Epoch: 1, Loss: 3.5526556968688965


Processing epoch 00:  61%|██████    | 2970/4850 [13:36<08:35,  3.65it/s]

Epoch: 1, Loss: 4.843573093414307


Processing epoch 00:  61%|██████▏   | 2971/4850 [13:36<08:35,  3.64it/s]

Epoch: 1, Loss: 2.9343624114990234


Processing epoch 00:  61%|██████▏   | 2972/4850 [13:37<08:34,  3.65it/s]

Epoch: 1, Loss: 3.239759683609009


Processing epoch 00:  61%|██████▏   | 2973/4850 [13:37<08:34,  3.65it/s]

Epoch: 1, Loss: 3.1639699935913086


Processing epoch 00:  61%|██████▏   | 2974/4850 [13:37<08:37,  3.62it/s]

Epoch: 1, Loss: 3.190274715423584


Processing epoch 00:  61%|██████▏   | 2975/4850 [13:37<08:34,  3.65it/s]

Epoch: 1, Loss: 4.19697380065918


Processing epoch 00:  61%|██████▏   | 2976/4850 [13:38<08:36,  3.63it/s]

Epoch: 1, Loss: 3.4829492568969727


Processing epoch 00:  61%|██████▏   | 2977/4850 [13:38<08:36,  3.62it/s]

Epoch: 1, Loss: 3.183074474334717


Processing epoch 00:  61%|██████▏   | 2978/4850 [13:38<08:34,  3.64it/s]

Epoch: 1, Loss: 3.806211233139038


Processing epoch 00:  61%|██████▏   | 2979/4850 [13:38<08:34,  3.64it/s]

Epoch: 1, Loss: 3.5083179473876953


Processing epoch 00:  61%|██████▏   | 2980/4850 [13:39<08:33,  3.64it/s]

Epoch: 1, Loss: 4.470766067504883


Processing epoch 00:  61%|██████▏   | 2981/4850 [13:39<08:32,  3.64it/s]

Epoch: 1, Loss: 3.362060070037842


Processing epoch 00:  61%|██████▏   | 2982/4850 [13:39<08:29,  3.66it/s]

Epoch: 1, Loss: 4.1712164878845215


Processing epoch 00:  62%|██████▏   | 2983/4850 [13:40<08:29,  3.66it/s]

Epoch: 1, Loss: 3.211003303527832


Processing epoch 00:  62%|██████▏   | 2984/4850 [13:40<08:33,  3.63it/s]

Epoch: 1, Loss: 3.534402847290039


Processing epoch 00:  62%|██████▏   | 2985/4850 [13:40<08:33,  3.63it/s]

Epoch: 1, Loss: 2.7817625999450684


Processing epoch 00:  62%|██████▏   | 2986/4850 [13:40<08:32,  3.63it/s]

Epoch: 1, Loss: 3.6916491985321045


Processing epoch 00:  62%|██████▏   | 2987/4850 [13:41<08:30,  3.65it/s]

Epoch: 1, Loss: 2.8853201866149902


Processing epoch 00:  62%|██████▏   | 2988/4850 [13:41<08:27,  3.67it/s]

Epoch: 1, Loss: 4.201540470123291


Processing epoch 00:  62%|██████▏   | 2989/4850 [13:41<08:25,  3.68it/s]

Epoch: 1, Loss: 3.5280988216400146


Processing epoch 00:  62%|██████▏   | 2990/4850 [13:41<08:37,  3.60it/s]

Epoch: 1, Loss: 3.0466670989990234


Processing epoch 00:  62%|██████▏   | 2991/4850 [13:42<08:42,  3.56it/s]

Epoch: 1, Loss: 3.688513994216919


Processing epoch 00:  62%|██████▏   | 2992/4850 [13:42<08:51,  3.49it/s]

Epoch: 1, Loss: 3.1209568977355957


Processing epoch 00:  62%|██████▏   | 2993/4850 [13:42<08:50,  3.50it/s]

Epoch: 1, Loss: 3.626739740371704


Processing epoch 00:  62%|██████▏   | 2994/4850 [13:43<08:47,  3.52it/s]

Epoch: 1, Loss: 4.341172218322754


Processing epoch 00:  62%|██████▏   | 2995/4850 [13:43<08:51,  3.49it/s]

Epoch: 1, Loss: 2.506904125213623


Processing epoch 00:  62%|██████▏   | 2996/4850 [13:43<09:01,  3.43it/s]

Epoch: 1, Loss: 3.511842727661133


Processing epoch 00:  62%|██████▏   | 2997/4850 [13:44<09:01,  3.42it/s]

Epoch: 1, Loss: 4.315189361572266


Processing epoch 00:  62%|██████▏   | 2998/4850 [13:44<09:14,  3.34it/s]

Epoch: 1, Loss: 3.2844643592834473


Processing epoch 00:  62%|██████▏   | 2999/4850 [13:44<09:13,  3.34it/s]

Epoch: 1, Loss: 3.864830255508423


Processing epoch 00:  62%|██████▏   | 3000/4850 [13:44<09:16,  3.32it/s]

Epoch: 1, Loss: 3.146883964538574


Processing epoch 00:  62%|██████▏   | 3001/4850 [13:45<09:01,  3.41it/s]

Epoch: 1, Loss: 3.4323129653930664


Processing epoch 00:  62%|██████▏   | 3002/4850 [13:45<08:50,  3.49it/s]

Epoch: 1, Loss: 3.6577486991882324


Processing epoch 00:  62%|██████▏   | 3003/4850 [13:45<08:44,  3.52it/s]

Epoch: 1, Loss: 3.3838701248168945


Processing epoch 00:  62%|██████▏   | 3004/4850 [13:46<08:39,  3.56it/s]

Epoch: 1, Loss: 3.4351415634155273


Processing epoch 00:  62%|██████▏   | 3005/4850 [13:46<08:34,  3.59it/s]

Epoch: 1, Loss: 2.973114013671875


Processing epoch 00:  62%|██████▏   | 3006/4850 [13:46<08:35,  3.57it/s]

Epoch: 1, Loss: 3.285295009613037


Processing epoch 00:  62%|██████▏   | 3007/4850 [13:46<08:31,  3.61it/s]

Epoch: 1, Loss: 3.3843185901641846


Processing epoch 00:  62%|██████▏   | 3008/4850 [13:47<08:35,  3.57it/s]

Epoch: 1, Loss: 3.5016424655914307


Processing epoch 00:  62%|██████▏   | 3009/4850 [13:47<08:32,  3.59it/s]

Epoch: 1, Loss: 3.426769733428955


Processing epoch 00:  62%|██████▏   | 3010/4850 [13:47<08:28,  3.62it/s]

Epoch: 1, Loss: 3.6154446601867676


Processing epoch 00:  62%|██████▏   | 3011/4850 [13:47<08:26,  3.63it/s]

Epoch: 1, Loss: 3.4552955627441406


Processing epoch 00:  62%|██████▏   | 3012/4850 [13:48<08:26,  3.63it/s]

Epoch: 1, Loss: 2.795846700668335


Processing epoch 00:  62%|██████▏   | 3013/4850 [13:48<08:26,  3.63it/s]

Epoch: 1, Loss: 3.337681293487549


Processing epoch 00:  62%|██████▏   | 3014/4850 [13:48<08:26,  3.62it/s]

Epoch: 1, Loss: 3.1335599422454834


Processing epoch 00:  62%|██████▏   | 3015/4850 [13:49<08:28,  3.61it/s]

Epoch: 1, Loss: 3.382307529449463


Processing epoch 00:  62%|██████▏   | 3016/4850 [13:49<08:24,  3.63it/s]

Epoch: 1, Loss: 4.495884895324707


Processing epoch 00:  62%|██████▏   | 3017/4850 [13:49<08:30,  3.59it/s]

Epoch: 1, Loss: 4.416853427886963


Processing epoch 00:  62%|██████▏   | 3018/4850 [13:49<08:26,  3.62it/s]

Epoch: 1, Loss: 3.72613263130188


Processing epoch 00:  62%|██████▏   | 3019/4850 [13:50<08:25,  3.62it/s]

Epoch: 1, Loss: 3.1740288734436035


Processing epoch 00:  62%|██████▏   | 3020/4850 [13:50<08:25,  3.62it/s]

Epoch: 1, Loss: 3.4046120643615723


Processing epoch 00:  62%|██████▏   | 3021/4850 [13:50<08:23,  3.63it/s]

Epoch: 1, Loss: 4.0294084548950195


Processing epoch 00:  62%|██████▏   | 3022/4850 [13:51<08:23,  3.63it/s]

Epoch: 1, Loss: 3.282589912414551


Processing epoch 00:  62%|██████▏   | 3023/4850 [13:51<08:22,  3.64it/s]

Epoch: 1, Loss: 3.5715250968933105


Processing epoch 00:  62%|██████▏   | 3024/4850 [13:51<08:23,  3.63it/s]

Epoch: 1, Loss: 2.996741771697998


Processing epoch 00:  62%|██████▏   | 3025/4850 [13:51<08:20,  3.65it/s]

Epoch: 1, Loss: 2.945540428161621


Processing epoch 00:  62%|██████▏   | 3026/4850 [13:52<08:23,  3.62it/s]

Epoch: 1, Loss: 2.972594738006592


Processing epoch 00:  62%|██████▏   | 3027/4850 [13:52<08:21,  3.64it/s]

Epoch: 1, Loss: 3.5970892906188965


Processing epoch 00:  62%|██████▏   | 3028/4850 [13:52<08:20,  3.64it/s]

Epoch: 1, Loss: 3.612316131591797


Processing epoch 00:  62%|██████▏   | 3029/4850 [13:52<08:19,  3.65it/s]

Epoch: 1, Loss: 3.331531524658203


Processing epoch 00:  62%|██████▏   | 3030/4850 [13:53<08:19,  3.64it/s]

Epoch: 1, Loss: 2.4286141395568848


Processing epoch 00:  62%|██████▏   | 3031/4850 [13:53<08:17,  3.66it/s]

Epoch: 1, Loss: 3.3820643424987793


Processing epoch 00:  63%|██████▎   | 3032/4850 [13:53<08:19,  3.64it/s]

Epoch: 1, Loss: 3.8233389854431152


Processing epoch 00:  63%|██████▎   | 3033/4850 [13:54<08:17,  3.65it/s]

Epoch: 1, Loss: 2.9090535640716553


Processing epoch 00:  63%|██████▎   | 3034/4850 [13:54<08:27,  3.58it/s]

Epoch: 1, Loss: 4.467004299163818


Processing epoch 00:  63%|██████▎   | 3035/4850 [13:54<08:22,  3.61it/s]

Epoch: 1, Loss: 4.173240661621094


Processing epoch 00:  63%|██████▎   | 3036/4850 [13:54<08:21,  3.62it/s]

Epoch: 1, Loss: 3.575075387954712


Processing epoch 00:  63%|██████▎   | 3037/4850 [13:55<08:31,  3.55it/s]

Epoch: 1, Loss: 3.671713352203369


Processing epoch 00:  63%|██████▎   | 3038/4850 [13:55<08:36,  3.51it/s]

Epoch: 1, Loss: 3.3726110458374023


Processing epoch 00:  63%|██████▎   | 3039/4850 [13:55<08:41,  3.47it/s]

Epoch: 1, Loss: 3.3662972450256348


Processing epoch 00:  63%|██████▎   | 3040/4850 [13:56<08:44,  3.45it/s]

Epoch: 1, Loss: 3.1777098178863525


Processing epoch 00:  63%|██████▎   | 3041/4850 [13:56<08:42,  3.46it/s]

Epoch: 1, Loss: 3.5080208778381348


Processing epoch 00:  63%|██████▎   | 3042/4850 [13:56<08:48,  3.42it/s]

Epoch: 1, Loss: 2.8805246353149414


Processing epoch 00:  63%|██████▎   | 3043/4850 [13:56<08:51,  3.40it/s]

Epoch: 1, Loss: 3.4510340690612793


Processing epoch 00:  63%|██████▎   | 3044/4850 [13:57<08:50,  3.40it/s]

Epoch: 1, Loss: 3.3289270401000977


Processing epoch 00:  63%|██████▎   | 3045/4850 [13:57<08:48,  3.41it/s]

Epoch: 1, Loss: 3.239071846008301


Processing epoch 00:  63%|██████▎   | 3046/4850 [13:57<08:44,  3.44it/s]

Epoch: 1, Loss: 3.235954999923706


Processing epoch 00:  63%|██████▎   | 3047/4850 [13:58<08:53,  3.38it/s]

Epoch: 1, Loss: 3.4895176887512207


Processing epoch 00:  63%|██████▎   | 3048/4850 [13:58<08:41,  3.45it/s]

Epoch: 1, Loss: 3.2145538330078125


Processing epoch 00:  63%|██████▎   | 3049/4850 [13:58<08:31,  3.52it/s]

Epoch: 1, Loss: 3.672163963317871


Processing epoch 00:  63%|██████▎   | 3050/4850 [13:58<08:25,  3.56it/s]

Epoch: 1, Loss: 3.2230958938598633


Processing epoch 00:  63%|██████▎   | 3051/4850 [13:59<08:21,  3.59it/s]

Epoch: 1, Loss: 3.3858230113983154


Processing epoch 00:  63%|██████▎   | 3052/4850 [13:59<08:17,  3.62it/s]

Epoch: 1, Loss: 2.788736343383789


Processing epoch 00:  63%|██████▎   | 3053/4850 [13:59<08:15,  3.62it/s]

Epoch: 1, Loss: 3.0986063480377197


Processing epoch 00:  63%|██████▎   | 3054/4850 [14:00<08:19,  3.60it/s]

Epoch: 1, Loss: 3.517254114151001


Processing epoch 00:  63%|██████▎   | 3055/4850 [14:00<08:18,  3.60it/s]

Epoch: 1, Loss: 3.5240345001220703


Processing epoch 00:  63%|██████▎   | 3056/4850 [14:00<08:17,  3.61it/s]

Epoch: 1, Loss: 3.5758745670318604


Processing epoch 00:  63%|██████▎   | 3057/4850 [14:00<08:15,  3.62it/s]

Epoch: 1, Loss: 3.9419517517089844


Processing epoch 00:  63%|██████▎   | 3058/4850 [14:01<08:13,  3.63it/s]

Epoch: 1, Loss: 3.6921191215515137


Processing epoch 00:  63%|██████▎   | 3059/4850 [14:01<08:11,  3.64it/s]

Epoch: 1, Loss: 4.588492393493652


Processing epoch 00:  63%|██████▎   | 3060/4850 [14:01<08:09,  3.65it/s]

Epoch: 1, Loss: 4.377694606781006


Processing epoch 00:  63%|██████▎   | 3061/4850 [14:01<08:11,  3.64it/s]

Epoch: 1, Loss: 2.859358787536621


Processing epoch 00:  63%|██████▎   | 3062/4850 [14:02<08:11,  3.64it/s]

Epoch: 1, Loss: 3.4344735145568848


Processing epoch 00:  63%|██████▎   | 3063/4850 [14:02<08:11,  3.64it/s]

Epoch: 1, Loss: 3.359067440032959


Processing epoch 00:  63%|██████▎   | 3064/4850 [14:02<08:07,  3.66it/s]

Epoch: 1, Loss: 4.350534439086914


Processing epoch 00:  63%|██████▎   | 3065/4850 [14:03<08:06,  3.67it/s]

Epoch: 1, Loss: 3.5404505729675293


Processing epoch 00:  63%|██████▎   | 3066/4850 [14:03<08:06,  3.66it/s]

Epoch: 1, Loss: 4.594260215759277


Processing epoch 00:  63%|██████▎   | 3067/4850 [14:03<08:07,  3.66it/s]

Epoch: 1, Loss: 3.9089906215667725


Processing epoch 00:  63%|██████▎   | 3068/4850 [14:03<08:08,  3.64it/s]

Epoch: 1, Loss: 2.8457112312316895


Processing epoch 00:  63%|██████▎   | 3069/4850 [14:04<08:11,  3.62it/s]

Epoch: 1, Loss: 4.638553142547607


Processing epoch 00:  63%|██████▎   | 3070/4850 [14:04<08:10,  3.63it/s]

Epoch: 1, Loss: 3.2304673194885254


Processing epoch 00:  63%|██████▎   | 3071/4850 [14:04<08:10,  3.63it/s]

Epoch: 1, Loss: 3.4985275268554688


Processing epoch 00:  63%|██████▎   | 3072/4850 [14:04<08:09,  3.63it/s]

Epoch: 1, Loss: 3.42522931098938


Processing epoch 00:  63%|██████▎   | 3073/4850 [14:05<08:07,  3.65it/s]

Epoch: 1, Loss: 3.706376552581787


Processing epoch 00:  63%|██████▎   | 3074/4850 [14:05<08:07,  3.64it/s]

Epoch: 1, Loss: 4.232400894165039


Processing epoch 00:  63%|██████▎   | 3075/4850 [14:05<08:06,  3.65it/s]

Epoch: 1, Loss: 3.2379212379455566


Processing epoch 00:  63%|██████▎   | 3076/4850 [14:06<08:05,  3.65it/s]

Epoch: 1, Loss: 3.335630416870117


Processing epoch 00:  63%|██████▎   | 3077/4850 [14:06<08:05,  3.65it/s]

Epoch: 1, Loss: 3.873751640319824


Processing epoch 00:  63%|██████▎   | 3078/4850 [14:06<08:05,  3.65it/s]

Epoch: 1, Loss: 3.1212551593780518


Processing epoch 00:  63%|██████▎   | 3079/4850 [14:06<08:06,  3.64it/s]

Epoch: 1, Loss: 3.1274490356445312


Processing epoch 00:  64%|██████▎   | 3080/4850 [14:07<08:06,  3.64it/s]

Epoch: 1, Loss: 2.898946523666382


Processing epoch 00:  64%|██████▎   | 3081/4850 [14:07<08:05,  3.65it/s]

Epoch: 1, Loss: 3.170738697052002


Processing epoch 00:  64%|██████▎   | 3082/4850 [14:07<08:06,  3.64it/s]

Epoch: 1, Loss: 2.900007963180542


Processing epoch 00:  64%|██████▎   | 3083/4850 [14:07<08:03,  3.65it/s]

Epoch: 1, Loss: 3.903778314590454


Processing epoch 00:  64%|██████▎   | 3084/4850 [14:08<08:20,  3.53it/s]

Epoch: 1, Loss: 3.7243003845214844


Processing epoch 00:  64%|██████▎   | 3085/4850 [14:08<08:24,  3.50it/s]

Epoch: 1, Loss: 4.027009010314941


Processing epoch 00:  64%|██████▎   | 3086/4850 [14:08<08:22,  3.51it/s]

Epoch: 1, Loss: 2.998384475708008


Processing epoch 00:  64%|██████▎   | 3087/4850 [14:09<08:26,  3.48it/s]

Epoch: 1, Loss: 3.4606828689575195


Processing epoch 00:  64%|██████▎   | 3088/4850 [14:09<08:27,  3.47it/s]

Epoch: 1, Loss: 3.212401866912842


Processing epoch 00:  64%|██████▎   | 3089/4850 [14:09<08:27,  3.47it/s]

Epoch: 1, Loss: 3.6380679607391357


Processing epoch 00:  64%|██████▎   | 3090/4850 [14:10<08:31,  3.44it/s]

Epoch: 1, Loss: 3.7217674255371094


Processing epoch 00:  64%|██████▎   | 3091/4850 [14:10<08:34,  3.42it/s]

Epoch: 1, Loss: 2.9708828926086426


Processing epoch 00:  64%|██████▍   | 3092/4850 [14:10<08:34,  3.42it/s]

Epoch: 1, Loss: 3.7011911869049072


Processing epoch 00:  64%|██████▍   | 3093/4850 [14:10<08:31,  3.43it/s]

Epoch: 1, Loss: 2.664745807647705


Processing epoch 00:  64%|██████▍   | 3094/4850 [14:11<08:32,  3.42it/s]

Epoch: 1, Loss: 4.577126502990723


Processing epoch 00:  64%|██████▍   | 3095/4850 [14:11<08:27,  3.46it/s]

Epoch: 1, Loss: 3.067990779876709


Processing epoch 00:  64%|██████▍   | 3096/4850 [14:11<08:18,  3.52it/s]

Epoch: 1, Loss: 3.264859676361084


Processing epoch 00:  64%|██████▍   | 3097/4850 [14:12<08:15,  3.54it/s]

Epoch: 1, Loss: 3.5398478507995605


Processing epoch 00:  64%|██████▍   | 3098/4850 [14:12<08:09,  3.58it/s]

Epoch: 1, Loss: 3.029698371887207


Processing epoch 00:  64%|██████▍   | 3099/4850 [14:12<08:10,  3.57it/s]

Epoch: 1, Loss: 3.048534870147705


Processing epoch 00:  64%|██████▍   | 3100/4850 [14:12<08:07,  3.59it/s]

Epoch: 1, Loss: 2.909125804901123


Processing epoch 00:  64%|██████▍   | 3101/4850 [14:13<08:04,  3.61it/s]

Epoch: 1, Loss: 3.3024117946624756


Processing epoch 00:  64%|██████▍   | 3102/4850 [14:13<08:02,  3.62it/s]

Epoch: 1, Loss: 3.2544236183166504


Processing epoch 00:  64%|██████▍   | 3103/4850 [14:13<07:59,  3.64it/s]

Epoch: 1, Loss: 4.116400718688965


Processing epoch 00:  64%|██████▍   | 3104/4850 [14:13<07:58,  3.65it/s]

Epoch: 1, Loss: 3.5985970497131348


Processing epoch 00:  64%|██████▍   | 3105/4850 [14:14<08:00,  3.63it/s]

Epoch: 1, Loss: 3.3415136337280273


Processing epoch 00:  64%|██████▍   | 3106/4850 [14:14<08:03,  3.61it/s]

Epoch: 1, Loss: 3.376704692840576


Processing epoch 00:  64%|██████▍   | 3107/4850 [14:14<08:00,  3.63it/s]

Epoch: 1, Loss: 3.4832029342651367


Processing epoch 00:  64%|██████▍   | 3108/4850 [14:15<07:59,  3.64it/s]

Epoch: 1, Loss: 3.2784433364868164


Processing epoch 00:  64%|██████▍   | 3109/4850 [14:15<07:58,  3.64it/s]

Epoch: 1, Loss: 3.2655081748962402


Processing epoch 00:  64%|██████▍   | 3110/4850 [14:15<07:57,  3.64it/s]

Epoch: 1, Loss: 3.197155475616455


Processing epoch 00:  64%|██████▍   | 3111/4850 [14:15<07:54,  3.66it/s]

Epoch: 1, Loss: 5.205300807952881


Processing epoch 00:  64%|██████▍   | 3112/4850 [14:16<07:50,  3.69it/s]

Epoch: 1, Loss: 4.351320743560791


Processing epoch 00:  64%|██████▍   | 3113/4850 [14:16<07:54,  3.66it/s]

Epoch: 1, Loss: 2.6814093589782715


Processing epoch 00:  64%|██████▍   | 3114/4850 [14:16<07:55,  3.65it/s]

Epoch: 1, Loss: 2.787360668182373


Processing epoch 00:  64%|██████▍   | 3115/4850 [14:16<07:54,  3.65it/s]

Epoch: 1, Loss: 3.52114200592041


Processing epoch 00:  64%|██████▍   | 3116/4850 [14:17<07:55,  3.64it/s]

Epoch: 1, Loss: 3.1999785900115967


Processing epoch 00:  64%|██████▍   | 3117/4850 [14:17<07:57,  3.63it/s]

Epoch: 1, Loss: 3.1759748458862305


Processing epoch 00:  64%|██████▍   | 3118/4850 [14:17<07:56,  3.63it/s]

Epoch: 1, Loss: 3.2084431648254395


Processing epoch 00:  64%|██████▍   | 3119/4850 [14:18<07:55,  3.64it/s]

Epoch: 1, Loss: 3.6087260246276855


Processing epoch 00:  64%|██████▍   | 3120/4850 [14:18<07:55,  3.64it/s]

Epoch: 1, Loss: 2.5427045822143555


Processing epoch 00:  64%|██████▍   | 3121/4850 [14:18<08:06,  3.56it/s]

Epoch: 1, Loss: 3.430300712585449


Processing epoch 00:  64%|██████▍   | 3122/4850 [14:18<08:03,  3.57it/s]

Epoch: 1, Loss: 2.639742851257324


Processing epoch 00:  64%|██████▍   | 3123/4850 [14:19<08:00,  3.60it/s]

Epoch: 1, Loss: 3.1585328578948975


Processing epoch 00:  64%|██████▍   | 3124/4850 [14:19<07:56,  3.62it/s]

Epoch: 1, Loss: 4.93511962890625


Processing epoch 00:  64%|██████▍   | 3125/4850 [14:19<07:55,  3.63it/s]

Epoch: 1, Loss: 2.8590505123138428


Processing epoch 00:  64%|██████▍   | 3126/4850 [14:20<07:55,  3.63it/s]

Epoch: 1, Loss: 2.700082778930664


Processing epoch 00:  64%|██████▍   | 3127/4850 [14:20<07:54,  3.63it/s]

Epoch: 1, Loss: 3.6136295795440674


Processing epoch 00:  64%|██████▍   | 3128/4850 [14:20<07:55,  3.62it/s]

Epoch: 1, Loss: 3.2114334106445312


Processing epoch 00:  65%|██████▍   | 3129/4850 [14:20<07:53,  3.64it/s]

Epoch: 1, Loss: 3.4806065559387207


Processing epoch 00:  65%|██████▍   | 3130/4850 [14:21<07:52,  3.64it/s]

Epoch: 1, Loss: 3.2076544761657715


Processing epoch 00:  65%|██████▍   | 3131/4850 [14:21<07:52,  3.64it/s]

Epoch: 1, Loss: 2.591538906097412


Processing epoch 00:  65%|██████▍   | 3132/4850 [14:21<08:00,  3.58it/s]

Epoch: 1, Loss: 3.0130560398101807


Processing epoch 00:  65%|██████▍   | 3133/4850 [14:21<08:04,  3.54it/s]

Epoch: 1, Loss: 4.445649147033691


Processing epoch 00:  65%|██████▍   | 3134/4850 [14:22<08:04,  3.54it/s]

Epoch: 1, Loss: 3.2498531341552734


Processing epoch 00:  65%|██████▍   | 3135/4850 [14:22<08:11,  3.49it/s]

Epoch: 1, Loss: 3.569173812866211


Processing epoch 00:  65%|██████▍   | 3136/4850 [14:22<08:13,  3.47it/s]

Epoch: 1, Loss: 3.06490421295166


Processing epoch 00:  65%|██████▍   | 3137/4850 [14:23<08:10,  3.49it/s]

Epoch: 1, Loss: 3.15574049949646


Processing epoch 00:  65%|██████▍   | 3138/4850 [14:23<08:15,  3.46it/s]

Epoch: 1, Loss: 2.527984619140625


Processing epoch 00:  65%|██████▍   | 3139/4850 [14:23<08:23,  3.40it/s]

Epoch: 1, Loss: 3.3796186447143555


Processing epoch 00:  65%|██████▍   | 3140/4850 [14:24<08:23,  3.40it/s]

Epoch: 1, Loss: 4.16373348236084


Processing epoch 00:  65%|██████▍   | 3141/4850 [14:24<08:27,  3.37it/s]

Epoch: 1, Loss: 3.124253273010254


Processing epoch 00:  65%|██████▍   | 3142/4850 [14:24<08:25,  3.38it/s]

Epoch: 1, Loss: 2.9906363487243652


Processing epoch 00:  65%|██████▍   | 3143/4850 [14:24<08:17,  3.43it/s]

Epoch: 1, Loss: 3.2901854515075684


Processing epoch 00:  65%|██████▍   | 3144/4850 [14:25<08:07,  3.50it/s]

Epoch: 1, Loss: 3.304205894470215


Processing epoch 00:  65%|██████▍   | 3145/4850 [14:25<08:01,  3.54it/s]

Epoch: 1, Loss: 3.647819995880127


Processing epoch 00:  65%|██████▍   | 3146/4850 [14:25<07:58,  3.56it/s]

Epoch: 1, Loss: 3.4210824966430664


Processing epoch 00:  65%|██████▍   | 3147/4850 [14:26<07:54,  3.59it/s]

Epoch: 1, Loss: 4.603045463562012


Processing epoch 00:  65%|██████▍   | 3148/4850 [14:26<07:52,  3.60it/s]

Epoch: 1, Loss: 3.3971898555755615


Processing epoch 00:  65%|██████▍   | 3149/4850 [14:26<07:49,  3.62it/s]

Epoch: 1, Loss: 5.189438819885254


Processing epoch 00:  65%|██████▍   | 3150/4850 [14:26<07:47,  3.64it/s]

Epoch: 1, Loss: 3.126918315887451


Processing epoch 00:  65%|██████▍   | 3151/4850 [14:27<07:47,  3.63it/s]

Epoch: 1, Loss: 3.2629640102386475


Processing epoch 00:  65%|██████▍   | 3152/4850 [14:27<07:47,  3.63it/s]

Epoch: 1, Loss: 3.11435604095459


Processing epoch 00:  65%|██████▌   | 3153/4850 [14:27<07:46,  3.64it/s]

Epoch: 1, Loss: 3.0817668437957764


Processing epoch 00:  65%|██████▌   | 3154/4850 [14:27<07:46,  3.64it/s]

Epoch: 1, Loss: 3.1587069034576416


Processing epoch 00:  65%|██████▌   | 3155/4850 [14:28<07:43,  3.65it/s]

Epoch: 1, Loss: 2.4961769580841064


Processing epoch 00:  65%|██████▌   | 3156/4850 [14:28<07:43,  3.66it/s]

Epoch: 1, Loss: 4.029261589050293


Processing epoch 00:  65%|██████▌   | 3157/4850 [14:28<07:41,  3.67it/s]

Epoch: 1, Loss: 3.9218592643737793


Processing epoch 00:  65%|██████▌   | 3158/4850 [14:29<07:46,  3.63it/s]

Epoch: 1, Loss: 3.8094475269317627


Processing epoch 00:  65%|██████▌   | 3159/4850 [14:29<07:44,  3.64it/s]

Epoch: 1, Loss: 3.6773440837860107


Processing epoch 00:  65%|██████▌   | 3160/4850 [14:29<07:44,  3.64it/s]

Epoch: 1, Loss: 3.384492874145508


Processing epoch 00:  65%|██████▌   | 3161/4850 [14:29<07:43,  3.64it/s]

Epoch: 1, Loss: 2.9132261276245117


Processing epoch 00:  65%|██████▌   | 3162/4850 [14:30<07:43,  3.65it/s]

Epoch: 1, Loss: 3.272576332092285


Processing epoch 00:  65%|██████▌   | 3163/4850 [14:30<07:44,  3.63it/s]

Epoch: 1, Loss: 3.2286906242370605


Processing epoch 00:  65%|██████▌   | 3164/4850 [14:30<07:42,  3.65it/s]

Epoch: 1, Loss: 3.743727922439575


Processing epoch 00:  65%|██████▌   | 3165/4850 [14:30<07:43,  3.64it/s]

Epoch: 1, Loss: 3.4359121322631836


Processing epoch 00:  65%|██████▌   | 3166/4850 [14:31<07:41,  3.65it/s]

Epoch: 1, Loss: 4.12524938583374


Processing epoch 00:  65%|██████▌   | 3167/4850 [14:31<07:43,  3.63it/s]

Epoch: 1, Loss: 2.426072597503662


Processing epoch 00:  65%|██████▌   | 3168/4850 [14:31<07:43,  3.63it/s]

Epoch: 1, Loss: 3.894467830657959


Processing epoch 00:  65%|██████▌   | 3169/4850 [14:32<07:41,  3.64it/s]

Epoch: 1, Loss: 2.9040656089782715


Processing epoch 00:  65%|██████▌   | 3170/4850 [14:32<07:42,  3.63it/s]

Epoch: 1, Loss: 2.8067219257354736


Processing epoch 00:  65%|██████▌   | 3171/4850 [14:32<07:42,  3.63it/s]

Epoch: 1, Loss: 2.788628101348877


Processing epoch 00:  65%|██████▌   | 3172/4850 [14:32<07:42,  3.63it/s]

Epoch: 1, Loss: 3.7772927284240723


Processing epoch 00:  65%|██████▌   | 3173/4850 [14:33<07:46,  3.59it/s]

Epoch: 1, Loss: 3.0543015003204346


Processing epoch 00:  65%|██████▌   | 3174/4850 [14:33<07:45,  3.60it/s]

Epoch: 1, Loss: 3.0566110610961914


Processing epoch 00:  65%|██████▌   | 3175/4850 [14:33<07:44,  3.60it/s]

Epoch: 1, Loss: 3.597499370574951


Processing epoch 00:  65%|██████▌   | 3176/4850 [14:33<07:45,  3.59it/s]

Epoch: 1, Loss: 3.019009828567505


Processing epoch 00:  66%|██████▌   | 3177/4850 [14:34<07:44,  3.60it/s]

Epoch: 1, Loss: 3.296661853790283


Processing epoch 00:  66%|██████▌   | 3178/4850 [14:34<07:44,  3.60it/s]

Epoch: 1, Loss: 3.1853694915771484


Processing epoch 00:  66%|██████▌   | 3179/4850 [14:34<07:49,  3.56it/s]

Epoch: 1, Loss: 2.772155284881592


Processing epoch 00:  66%|██████▌   | 3180/4850 [14:35<07:50,  3.55it/s]

Epoch: 1, Loss: 3.9829044342041016


Processing epoch 00:  66%|██████▌   | 3181/4850 [14:35<07:54,  3.52it/s]

Epoch: 1, Loss: 2.8499488830566406


Processing epoch 00:  66%|██████▌   | 3182/4850 [14:35<07:52,  3.53it/s]

Epoch: 1, Loss: 2.442451238632202


Processing epoch 00:  66%|██████▌   | 3183/4850 [14:35<07:58,  3.49it/s]

Epoch: 1, Loss: 3.5891876220703125


Processing epoch 00:  66%|██████▌   | 3184/4850 [14:36<08:06,  3.42it/s]

Epoch: 1, Loss: 3.301363945007324


Processing epoch 00:  66%|██████▌   | 3185/4850 [14:36<08:10,  3.40it/s]

Epoch: 1, Loss: 3.103069305419922


Processing epoch 00:  66%|██████▌   | 3186/4850 [14:36<08:10,  3.39it/s]

Epoch: 1, Loss: 3.5150485038757324


Processing epoch 00:  66%|██████▌   | 3187/4850 [14:37<08:14,  3.36it/s]

Epoch: 1, Loss: 3.317135810852051


Processing epoch 00:  66%|██████▌   | 3188/4850 [14:37<08:09,  3.40it/s]

Epoch: 1, Loss: 4.419523239135742


Processing epoch 00:  66%|██████▌   | 3189/4850 [14:37<08:19,  3.33it/s]

Epoch: 1, Loss: 3.1931655406951904


Processing epoch 00:  66%|██████▌   | 3190/4850 [14:38<08:04,  3.43it/s]

Epoch: 1, Loss: 2.8450582027435303


Processing epoch 00:  66%|██████▌   | 3191/4850 [14:38<07:56,  3.48it/s]

Epoch: 1, Loss: 3.1583104133605957


Processing epoch 00:  66%|██████▌   | 3192/4850 [14:38<07:50,  3.53it/s]

Epoch: 1, Loss: 3.666337013244629


Processing epoch 00:  66%|██████▌   | 3193/4850 [14:38<07:46,  3.55it/s]

Epoch: 1, Loss: 3.284127712249756


Processing epoch 00:  66%|██████▌   | 3194/4850 [14:39<07:51,  3.51it/s]

Epoch: 1, Loss: 4.509662628173828


Processing epoch 00:  66%|██████▌   | 3195/4850 [14:39<07:45,  3.55it/s]

Epoch: 1, Loss: 3.116149425506592


Processing epoch 00:  66%|██████▌   | 3196/4850 [14:39<07:40,  3.59it/s]

Epoch: 1, Loss: 4.328005313873291


Processing epoch 00:  66%|██████▌   | 3197/4850 [14:40<07:38,  3.61it/s]

Epoch: 1, Loss: 3.672013998031616


Processing epoch 00:  66%|██████▌   | 3198/4850 [14:40<07:35,  3.62it/s]

Epoch: 1, Loss: 3.5794548988342285


Processing epoch 00:  66%|██████▌   | 3199/4850 [14:40<07:40,  3.59it/s]

Epoch: 1, Loss: 3.362999200820923


Processing epoch 00:  66%|██████▌   | 3200/4850 [14:40<07:36,  3.61it/s]

Epoch: 1, Loss: 3.037644863128662


Processing epoch 00:  66%|██████▌   | 3201/4850 [14:41<07:36,  3.61it/s]

Epoch: 1, Loss: 3.733004093170166


Processing epoch 00:  66%|██████▌   | 3202/4850 [14:41<07:35,  3.62it/s]

Epoch: 1, Loss: 3.1679694652557373


Processing epoch 00:  66%|██████▌   | 3203/4850 [14:41<07:34,  3.63it/s]

Epoch: 1, Loss: 2.8499491214752197


Processing epoch 00:  66%|██████▌   | 3204/4850 [14:41<07:32,  3.64it/s]

Epoch: 1, Loss: 2.593808174133301


Processing epoch 00:  66%|██████▌   | 3205/4850 [14:42<07:33,  3.63it/s]

Epoch: 1, Loss: 3.7298812866210938


Processing epoch 00:  66%|██████▌   | 3206/4850 [14:42<07:34,  3.61it/s]

Epoch: 1, Loss: 4.0286455154418945


Processing epoch 00:  66%|██████▌   | 3207/4850 [14:42<07:31,  3.64it/s]

Epoch: 1, Loss: 3.860804557800293


Processing epoch 00:  66%|██████▌   | 3208/4850 [14:43<07:30,  3.65it/s]

Epoch: 1, Loss: 4.739439487457275


Processing epoch 00:  66%|██████▌   | 3209/4850 [14:43<07:30,  3.65it/s]

Epoch: 1, Loss: 4.5665388107299805


Processing epoch 00:  66%|██████▌   | 3210/4850 [14:43<07:30,  3.64it/s]

Epoch: 1, Loss: 3.562730312347412


Processing epoch 00:  66%|██████▌   | 3211/4850 [14:43<07:27,  3.66it/s]

Epoch: 1, Loss: 4.043565273284912


Processing epoch 00:  66%|██████▌   | 3212/4850 [14:44<07:27,  3.66it/s]

Epoch: 1, Loss: 3.451272487640381


Processing epoch 00:  66%|██████▌   | 3213/4850 [14:44<07:25,  3.67it/s]

Epoch: 1, Loss: 4.691742420196533


Processing epoch 00:  66%|██████▋   | 3214/4850 [14:44<07:27,  3.66it/s]

Epoch: 1, Loss: 3.0495336055755615


Processing epoch 00:  66%|██████▋   | 3215/4850 [14:44<07:26,  3.66it/s]

Epoch: 1, Loss: 3.796299934387207


Processing epoch 00:  66%|██████▋   | 3216/4850 [14:45<07:29,  3.64it/s]

Epoch: 1, Loss: 3.103607654571533


Processing epoch 00:  66%|██████▋   | 3217/4850 [14:45<07:29,  3.63it/s]

Epoch: 1, Loss: 3.1638002395629883


Processing epoch 00:  66%|██████▋   | 3218/4850 [14:45<07:28,  3.64it/s]

Epoch: 1, Loss: 3.5961084365844727


Processing epoch 00:  66%|██████▋   | 3219/4850 [14:46<07:29,  3.63it/s]

Epoch: 1, Loss: 3.8004071712493896


Processing epoch 00:  66%|██████▋   | 3220/4850 [14:46<07:28,  3.64it/s]

Epoch: 1, Loss: 3.1024136543273926


Processing epoch 00:  66%|██████▋   | 3221/4850 [14:46<07:30,  3.62it/s]

Epoch: 1, Loss: 3.4217112064361572


Processing epoch 00:  66%|██████▋   | 3222/4850 [14:46<07:27,  3.64it/s]

Epoch: 1, Loss: 3.493053436279297


Processing epoch 00:  66%|██████▋   | 3223/4850 [14:47<07:26,  3.64it/s]

Epoch: 1, Loss: 3.0658063888549805


Processing epoch 00:  66%|██████▋   | 3224/4850 [14:47<07:26,  3.65it/s]

Epoch: 1, Loss: 3.307307243347168


Processing epoch 00:  66%|██████▋   | 3225/4850 [14:47<07:28,  3.62it/s]

Epoch: 1, Loss: 2.74513578414917


Processing epoch 00:  67%|██████▋   | 3226/4850 [14:47<07:34,  3.57it/s]

Epoch: 1, Loss: 3.1450700759887695


Processing epoch 00:  67%|██████▋   | 3227/4850 [14:48<07:38,  3.54it/s]

Epoch: 1, Loss: 3.538792848587036


Processing epoch 00:  67%|██████▋   | 3228/4850 [14:48<07:39,  3.53it/s]

Epoch: 1, Loss: 4.446961402893066


Processing epoch 00:  67%|██████▋   | 3229/4850 [14:48<07:37,  3.54it/s]

Epoch: 1, Loss: 2.7779078483581543


Processing epoch 00:  67%|██████▋   | 3230/4850 [14:49<07:41,  3.51it/s]

Epoch: 1, Loss: 3.3621344566345215


Processing epoch 00:  67%|██████▋   | 3231/4850 [14:49<07:44,  3.48it/s]

Epoch: 1, Loss: 4.704676151275635


Processing epoch 00:  67%|██████▋   | 3232/4850 [14:49<07:49,  3.45it/s]

Epoch: 1, Loss: 3.558157444000244


Processing epoch 00:  67%|██████▋   | 3233/4850 [14:50<07:46,  3.47it/s]

Epoch: 1, Loss: 3.0679283142089844


Processing epoch 00:  67%|██████▋   | 3234/4850 [14:50<07:52,  3.42it/s]

Epoch: 1, Loss: 3.027529001235962


Processing epoch 00:  67%|██████▋   | 3235/4850 [14:50<07:53,  3.41it/s]

Epoch: 1, Loss: 3.0561628341674805


Processing epoch 00:  67%|██████▋   | 3236/4850 [14:50<07:53,  3.41it/s]

Epoch: 1, Loss: 2.8273439407348633


Processing epoch 00:  67%|██████▋   | 3237/4850 [14:51<07:45,  3.47it/s]

Epoch: 1, Loss: 4.185641765594482


Processing epoch 00:  67%|██████▋   | 3238/4850 [14:51<07:36,  3.53it/s]

Epoch: 1, Loss: 3.4387388229370117


Processing epoch 00:  67%|██████▋   | 3239/4850 [14:51<07:32,  3.56it/s]

Epoch: 1, Loss: 3.5466158390045166


Processing epoch 00:  67%|██████▋   | 3240/4850 [14:52<07:28,  3.59it/s]

Epoch: 1, Loss: 2.9634218215942383


Processing epoch 00:  67%|██████▋   | 3241/4850 [14:52<07:24,  3.62it/s]

Epoch: 1, Loss: 3.280824661254883


Processing epoch 00:  67%|██████▋   | 3242/4850 [14:52<07:23,  3.62it/s]

Epoch: 1, Loss: 2.9975316524505615


Processing epoch 00:  67%|██████▋   | 3243/4850 [14:52<07:24,  3.61it/s]

Epoch: 1, Loss: 4.01051139831543


Processing epoch 00:  67%|██████▋   | 3244/4850 [14:53<07:23,  3.62it/s]

Epoch: 1, Loss: 2.7057290077209473


Processing epoch 00:  67%|██████▋   | 3245/4850 [14:53<07:22,  3.63it/s]

Epoch: 1, Loss: 3.0297820568084717


Processing epoch 00:  67%|██████▋   | 3246/4850 [14:53<07:21,  3.63it/s]

Epoch: 1, Loss: 3.103182315826416


Processing epoch 00:  67%|██████▋   | 3247/4850 [14:53<07:19,  3.65it/s]

Epoch: 1, Loss: 2.823333263397217


Processing epoch 00:  67%|██████▋   | 3248/4850 [14:54<07:16,  3.67it/s]

Epoch: 1, Loss: 4.830868721008301


Processing epoch 00:  67%|██████▋   | 3249/4850 [14:54<07:17,  3.66it/s]

Epoch: 1, Loss: 2.9579858779907227


Processing epoch 00:  67%|██████▋   | 3250/4850 [14:54<07:15,  3.67it/s]

Epoch: 1, Loss: 3.5050437450408936


Processing epoch 00:  67%|██████▋   | 3251/4850 [14:55<07:17,  3.65it/s]

Epoch: 1, Loss: 2.745695114135742


Processing epoch 00:  67%|██████▋   | 3252/4850 [14:55<07:16,  3.66it/s]

Epoch: 1, Loss: 3.7397103309631348


Processing epoch 00:  67%|██████▋   | 3253/4850 [14:55<07:16,  3.66it/s]

Epoch: 1, Loss: 3.389620304107666


Processing epoch 00:  67%|██████▋   | 3254/4850 [14:55<07:15,  3.66it/s]

Epoch: 1, Loss: 3.5741286277770996


Processing epoch 00:  67%|██████▋   | 3255/4850 [14:56<07:16,  3.66it/s]

Epoch: 1, Loss: 3.0942203998565674


Processing epoch 00:  67%|██████▋   | 3256/4850 [14:56<07:14,  3.67it/s]

Epoch: 1, Loss: 4.013075828552246


Processing epoch 00:  67%|██████▋   | 3257/4850 [14:56<07:15,  3.66it/s]

Epoch: 1, Loss: 3.629082679748535


Processing epoch 00:  67%|██████▋   | 3258/4850 [14:56<07:20,  3.61it/s]

Epoch: 1, Loss: 3.4279861450195312


Processing epoch 00:  67%|██████▋   | 3259/4850 [14:57<07:18,  3.63it/s]

Epoch: 1, Loss: 3.273806571960449


Processing epoch 00:  67%|██████▋   | 3260/4850 [14:57<07:16,  3.64it/s]

Epoch: 1, Loss: 3.3403468132019043


Processing epoch 00:  67%|██████▋   | 3261/4850 [14:57<07:16,  3.64it/s]

Epoch: 1, Loss: 2.7108373641967773


Processing epoch 00:  67%|██████▋   | 3262/4850 [14:58<07:15,  3.64it/s]

Epoch: 1, Loss: 3.8431193828582764


Processing epoch 00:  67%|██████▋   | 3263/4850 [14:58<07:15,  3.64it/s]

Epoch: 1, Loss: 2.988399028778076


Processing epoch 00:  67%|██████▋   | 3264/4850 [14:58<07:16,  3.64it/s]

Epoch: 1, Loss: 4.927137851715088


Processing epoch 00:  67%|██████▋   | 3265/4850 [14:58<07:14,  3.65it/s]

Epoch: 1, Loss: 3.452474594116211


Processing epoch 00:  67%|██████▋   | 3266/4850 [14:59<07:12,  3.66it/s]

Epoch: 1, Loss: 3.467543840408325


Processing epoch 00:  67%|██████▋   | 3267/4850 [14:59<07:12,  3.66it/s]

Epoch: 1, Loss: 4.659607887268066


Processing epoch 00:  67%|██████▋   | 3268/4850 [14:59<07:11,  3.66it/s]

Epoch: 1, Loss: 4.096289157867432


Processing epoch 00:  67%|██████▋   | 3269/4850 [14:59<07:18,  3.61it/s]

Epoch: 1, Loss: 2.7910752296447754


Processing epoch 00:  67%|██████▋   | 3270/4850 [15:00<07:19,  3.59it/s]

Epoch: 1, Loss: 2.31400203704834


Processing epoch 00:  67%|██████▋   | 3271/4850 [15:00<07:18,  3.60it/s]

Epoch: 1, Loss: 3.3319437503814697


Processing epoch 00:  67%|██████▋   | 3272/4850 [15:00<07:16,  3.61it/s]

Epoch: 1, Loss: 2.876354694366455


Processing epoch 00:  67%|██████▋   | 3273/4850 [15:01<07:24,  3.55it/s]

Epoch: 1, Loss: 2.598410129547119


Processing epoch 00:  68%|██████▊   | 3274/4850 [15:01<07:31,  3.49it/s]

Epoch: 1, Loss: 4.183028697967529


Processing epoch 00:  68%|██████▊   | 3275/4850 [15:01<07:37,  3.44it/s]

Epoch: 1, Loss: 3.2608022689819336


Processing epoch 00:  68%|██████▊   | 3276/4850 [15:01<07:41,  3.41it/s]

Epoch: 1, Loss: 3.5491271018981934


Processing epoch 00:  68%|██████▊   | 3277/4850 [15:02<07:44,  3.38it/s]

Epoch: 1, Loss: 3.2436728477478027


Processing epoch 00:  68%|██████▊   | 3278/4850 [15:02<07:41,  3.41it/s]

Epoch: 1, Loss: 3.3929390907287598


Processing epoch 00:  68%|██████▊   | 3279/4850 [15:02<07:40,  3.41it/s]

Epoch: 1, Loss: 2.931678295135498


Processing epoch 00:  68%|██████▊   | 3280/4850 [15:03<07:43,  3.39it/s]

Epoch: 1, Loss: 2.8280460834503174


Processing epoch 00:  68%|██████▊   | 3281/4850 [15:03<07:40,  3.40it/s]

Epoch: 1, Loss: 3.4726386070251465


Processing epoch 00:  68%|██████▊   | 3282/4850 [15:03<07:35,  3.45it/s]

Epoch: 1, Loss: 3.4291484355926514


Processing epoch 00:  68%|██████▊   | 3283/4850 [15:04<07:38,  3.42it/s]

Epoch: 1, Loss: 3.053344488143921


Processing epoch 00:  68%|██████▊   | 3284/4850 [15:04<07:38,  3.41it/s]

Epoch: 1, Loss: 2.9275896549224854


Processing epoch 00:  68%|██████▊   | 3285/4850 [15:04<07:30,  3.48it/s]

Epoch: 1, Loss: 3.256765127182007


Processing epoch 00:  68%|██████▊   | 3286/4850 [15:04<07:21,  3.54it/s]

Epoch: 1, Loss: 3.9223408699035645


Processing epoch 00:  68%|██████▊   | 3287/4850 [15:05<07:17,  3.57it/s]

Epoch: 1, Loss: 2.386399269104004


Processing epoch 00:  68%|██████▊   | 3288/4850 [15:05<07:14,  3.60it/s]

Epoch: 1, Loss: 3.4172253608703613


Processing epoch 00:  68%|██████▊   | 3289/4850 [15:05<07:12,  3.61it/s]

Epoch: 1, Loss: 3.0292458534240723


Processing epoch 00:  68%|██████▊   | 3290/4850 [15:05<07:10,  3.62it/s]

Epoch: 1, Loss: 3.0206050872802734


Processing epoch 00:  68%|██████▊   | 3291/4850 [15:06<07:15,  3.58it/s]

Epoch: 1, Loss: 3.153599262237549


Processing epoch 00:  68%|██████▊   | 3292/4850 [15:06<07:15,  3.58it/s]

Epoch: 1, Loss: 3.363779306411743


Processing epoch 00:  68%|██████▊   | 3293/4850 [15:06<07:12,  3.60it/s]

Epoch: 1, Loss: 3.911931276321411


Processing epoch 00:  68%|██████▊   | 3294/4850 [15:07<07:10,  3.61it/s]

Epoch: 1, Loss: 3.201620101928711


Processing epoch 00:  68%|██████▊   | 3295/4850 [15:07<07:10,  3.61it/s]

Epoch: 1, Loss: 2.664978504180908


Processing epoch 00:  68%|██████▊   | 3296/4850 [15:07<07:09,  3.61it/s]

Epoch: 1, Loss: 3.1548702716827393


Processing epoch 00:  68%|██████▊   | 3297/4850 [15:07<07:05,  3.65it/s]

Epoch: 1, Loss: 4.079474449157715


Processing epoch 00:  68%|██████▊   | 3298/4850 [15:08<07:06,  3.64it/s]

Epoch: 1, Loss: 3.1036391258239746


Processing epoch 00:  68%|██████▊   | 3299/4850 [15:08<07:05,  3.65it/s]

Epoch: 1, Loss: 3.945629596710205


Processing epoch 00:  68%|██████▊   | 3300/4850 [15:08<07:05,  3.64it/s]

Epoch: 1, Loss: 3.2309701442718506


Processing epoch 00:  68%|██████▊   | 3301/4850 [15:09<07:05,  3.64it/s]

Epoch: 1, Loss: 4.217764854431152


Processing epoch 00:  68%|██████▊   | 3302/4850 [15:09<07:05,  3.63it/s]

Epoch: 1, Loss: 3.515688180923462


Processing epoch 00:  68%|██████▊   | 3303/4850 [15:09<07:06,  3.63it/s]

Epoch: 1, Loss: 2.839827299118042


Processing epoch 00:  68%|██████▊   | 3304/4850 [15:09<07:05,  3.63it/s]

Epoch: 1, Loss: 3.1747822761535645


Processing epoch 00:  68%|██████▊   | 3305/4850 [15:10<07:07,  3.62it/s]

Epoch: 1, Loss: 3.385639190673828


Processing epoch 00:  68%|██████▊   | 3306/4850 [15:10<07:08,  3.60it/s]

Epoch: 1, Loss: 3.537736415863037


Processing epoch 00:  68%|██████▊   | 3307/4850 [15:10<07:07,  3.61it/s]

Epoch: 1, Loss: 3.443653106689453


Processing epoch 00:  68%|██████▊   | 3308/4850 [15:10<07:06,  3.61it/s]

Epoch: 1, Loss: 3.367494821548462


Processing epoch 00:  68%|██████▊   | 3309/4850 [15:11<07:04,  3.63it/s]

Epoch: 1, Loss: 3.421311378479004


Processing epoch 00:  68%|██████▊   | 3310/4850 [15:11<07:03,  3.64it/s]

Epoch: 1, Loss: 3.8479862213134766


Processing epoch 00:  68%|██████▊   | 3311/4850 [15:11<07:02,  3.65it/s]

Epoch: 1, Loss: 3.1790144443511963


Processing epoch 00:  68%|██████▊   | 3312/4850 [15:12<07:01,  3.65it/s]

Epoch: 1, Loss: 3.138300895690918


Processing epoch 00:  68%|██████▊   | 3313/4850 [15:12<07:00,  3.65it/s]

Epoch: 1, Loss: 4.009992599487305


Processing epoch 00:  68%|██████▊   | 3314/4850 [15:12<07:01,  3.64it/s]

Epoch: 1, Loss: 3.6415364742279053


Processing epoch 00:  68%|██████▊   | 3315/4850 [15:12<07:01,  3.64it/s]

Epoch: 1, Loss: 3.1031527519226074


Processing epoch 00:  68%|██████▊   | 3316/4850 [15:13<07:00,  3.65it/s]

Epoch: 1, Loss: 2.921800136566162


Processing epoch 00:  68%|██████▊   | 3317/4850 [15:13<07:03,  3.62it/s]

Epoch: 1, Loss: 2.9591193199157715


Processing epoch 00:  68%|██████▊   | 3318/4850 [15:13<07:03,  3.62it/s]

Epoch: 1, Loss: 3.3482606410980225


Processing epoch 00:  68%|██████▊   | 3319/4850 [15:13<07:02,  3.63it/s]

Epoch: 1, Loss: 3.0036892890930176


Processing epoch 00:  68%|██████▊   | 3320/4850 [15:14<07:01,  3.63it/s]

Epoch: 1, Loss: 3.5164263248443604


Processing epoch 00:  68%|██████▊   | 3321/4850 [15:14<07:14,  3.52it/s]

Epoch: 1, Loss: 3.52797794342041


Processing epoch 00:  68%|██████▊   | 3322/4850 [15:14<07:13,  3.52it/s]

Epoch: 1, Loss: 2.7086472511291504


Processing epoch 00:  69%|██████▊   | 3323/4850 [15:15<07:13,  3.53it/s]

Epoch: 1, Loss: 3.151151657104492


Processing epoch 00:  69%|██████▊   | 3324/4850 [15:15<07:20,  3.47it/s]

Epoch: 1, Loss: 3.284698963165283


Processing epoch 00:  69%|██████▊   | 3325/4850 [15:15<07:16,  3.49it/s]

Epoch: 1, Loss: 3.7524876594543457


Processing epoch 00:  69%|██████▊   | 3326/4850 [15:15<07:20,  3.46it/s]

Epoch: 1, Loss: 4.143639087677002


Processing epoch 00:  69%|██████▊   | 3327/4850 [15:16<07:19,  3.46it/s]

Epoch: 1, Loss: 4.315008163452148


Processing epoch 00:  69%|██████▊   | 3328/4850 [15:16<07:25,  3.41it/s]

Epoch: 1, Loss: 4.3800764083862305


Processing epoch 00:  69%|██████▊   | 3329/4850 [15:16<07:26,  3.41it/s]

Epoch: 1, Loss: 2.915454864501953


Processing epoch 00:  69%|██████▊   | 3330/4850 [15:17<07:24,  3.42it/s]

Epoch: 1, Loss: 3.312216281890869


Processing epoch 00:  69%|██████▊   | 3331/4850 [15:17<07:25,  3.41it/s]

Epoch: 1, Loss: 3.458507537841797


Processing epoch 00:  69%|██████▊   | 3332/4850 [15:17<07:25,  3.41it/s]

Epoch: 1, Loss: 3.0264461040496826


Processing epoch 00:  69%|██████▊   | 3333/4850 [15:18<07:16,  3.47it/s]

Epoch: 1, Loss: 3.7022199630737305


Processing epoch 00:  69%|██████▊   | 3334/4850 [15:18<07:10,  3.52it/s]

Epoch: 1, Loss: 2.6162643432617188


Processing epoch 00:  69%|██████▉   | 3335/4850 [15:18<07:06,  3.55it/s]

Epoch: 1, Loss: 3.910661458969116


Processing epoch 00:  69%|██████▉   | 3336/4850 [15:18<07:03,  3.57it/s]

Epoch: 1, Loss: 3.6694436073303223


Processing epoch 00:  69%|██████▉   | 3337/4850 [15:19<07:02,  3.58it/s]

Epoch: 1, Loss: 3.119295120239258


Processing epoch 00:  69%|██████▉   | 3338/4850 [15:19<07:00,  3.60it/s]

Epoch: 1, Loss: 3.6459810733795166


Processing epoch 00:  69%|██████▉   | 3339/4850 [15:19<07:00,  3.59it/s]

Epoch: 1, Loss: 3.603394031524658


Processing epoch 00:  69%|██████▉   | 3340/4850 [15:19<06:59,  3.60it/s]

Epoch: 1, Loss: 3.168076992034912


Processing epoch 00:  69%|██████▉   | 3341/4850 [15:20<06:55,  3.63it/s]

Epoch: 1, Loss: 4.018023490905762


Processing epoch 00:  69%|██████▉   | 3342/4850 [15:20<07:04,  3.56it/s]

Epoch: 1, Loss: 3.876610279083252


Processing epoch 00:  69%|██████▉   | 3343/4850 [15:20<06:59,  3.59it/s]

Epoch: 1, Loss: 3.0214128494262695


Processing epoch 00:  69%|██████▉   | 3344/4850 [15:21<06:56,  3.61it/s]

Epoch: 1, Loss: 2.9823460578918457


Processing epoch 00:  69%|██████▉   | 3345/4850 [15:21<06:56,  3.61it/s]

Epoch: 1, Loss: 4.138893127441406


Processing epoch 00:  69%|██████▉   | 3346/4850 [15:21<06:55,  3.62it/s]

Epoch: 1, Loss: 4.111440658569336


Processing epoch 00:  69%|██████▉   | 3347/4850 [15:21<06:55,  3.62it/s]

Epoch: 1, Loss: 3.408231735229492


Processing epoch 00:  69%|██████▉   | 3348/4850 [15:22<06:54,  3.63it/s]

Epoch: 1, Loss: 4.510120391845703


Processing epoch 00:  69%|██████▉   | 3349/4850 [15:22<06:54,  3.62it/s]

Epoch: 1, Loss: 3.4166979789733887


Processing epoch 00:  69%|██████▉   | 3350/4850 [15:22<06:53,  3.63it/s]

Epoch: 1, Loss: 2.7912988662719727


Processing epoch 00:  69%|██████▉   | 3351/4850 [15:22<06:53,  3.62it/s]

Epoch: 1, Loss: 2.5822956562042236


Processing epoch 00:  69%|██████▉   | 3352/4850 [15:23<06:50,  3.65it/s]

Epoch: 1, Loss: 2.7674856185913086


Processing epoch 00:  69%|██████▉   | 3353/4850 [15:23<06:50,  3.64it/s]

Epoch: 1, Loss: 2.530113697052002


Processing epoch 00:  69%|██████▉   | 3354/4850 [15:23<06:55,  3.60it/s]

Epoch: 1, Loss: 2.869652271270752


Processing epoch 00:  69%|██████▉   | 3355/4850 [15:24<06:53,  3.62it/s]

Epoch: 1, Loss: 4.551477909088135


Processing epoch 00:  69%|██████▉   | 3356/4850 [15:24<06:52,  3.62it/s]

Epoch: 1, Loss: 3.508423328399658


Processing epoch 00:  69%|██████▉   | 3357/4850 [15:24<06:53,  3.61it/s]

Epoch: 1, Loss: 2.9518628120422363


Processing epoch 00:  69%|██████▉   | 3358/4850 [15:24<06:51,  3.62it/s]

Epoch: 1, Loss: 3.261294364929199


Processing epoch 00:  69%|██████▉   | 3359/4850 [15:25<06:50,  3.63it/s]

Epoch: 1, Loss: 4.519932270050049


Processing epoch 00:  69%|██████▉   | 3360/4850 [15:25<06:50,  3.63it/s]

Epoch: 1, Loss: 2.935152530670166


Processing epoch 00:  69%|██████▉   | 3361/4850 [15:25<06:48,  3.64it/s]

Epoch: 1, Loss: 3.758408784866333


Processing epoch 00:  69%|██████▉   | 3362/4850 [15:26<06:48,  3.64it/s]

Epoch: 1, Loss: 4.012375354766846


Processing epoch 00:  69%|██████▉   | 3363/4850 [15:26<06:49,  3.63it/s]

Epoch: 1, Loss: 3.084973096847534


Processing epoch 00:  69%|██████▉   | 3364/4850 [15:26<06:50,  3.62it/s]

Epoch: 1, Loss: 3.0013749599456787


Processing epoch 00:  69%|██████▉   | 3365/4850 [15:26<06:47,  3.64it/s]

Epoch: 1, Loss: 2.9745519161224365


Processing epoch 00:  69%|██████▉   | 3366/4850 [15:27<06:46,  3.65it/s]

Epoch: 1, Loss: 3.101442575454712


Processing epoch 00:  69%|██████▉   | 3367/4850 [15:27<06:46,  3.65it/s]

Epoch: 1, Loss: 2.841813564300537


Processing epoch 00:  69%|██████▉   | 3368/4850 [15:27<06:49,  3.62it/s]

Epoch: 1, Loss: 3.104691743850708


Processing epoch 00:  69%|██████▉   | 3369/4850 [15:27<06:59,  3.53it/s]

Epoch: 1, Loss: 2.6471426486968994


Processing epoch 00:  69%|██████▉   | 3370/4850 [15:28<07:12,  3.42it/s]

Epoch: 1, Loss: 3.2995452880859375


Processing epoch 00:  70%|██████▉   | 3371/4850 [15:28<07:09,  3.44it/s]

Epoch: 1, Loss: 3.420240640640259


Processing epoch 00:  70%|██████▉   | 3372/4850 [15:28<07:06,  3.46it/s]

Epoch: 1, Loss: 2.630620002746582


Processing epoch 00:  70%|██████▉   | 3373/4850 [15:29<07:05,  3.47it/s]

Epoch: 1, Loss: 3.7299447059631348


Processing epoch 00:  70%|██████▉   | 3374/4850 [15:29<07:06,  3.46it/s]

Epoch: 1, Loss: 3.140435218811035


Processing epoch 00:  70%|██████▉   | 3375/4850 [15:29<07:07,  3.45it/s]

Epoch: 1, Loss: 3.405439853668213


Processing epoch 00:  70%|██████▉   | 3376/4850 [15:30<07:17,  3.37it/s]

Epoch: 1, Loss: 4.026939392089844


Processing epoch 00:  70%|██████▉   | 3377/4850 [15:30<07:12,  3.41it/s]

Epoch: 1, Loss: 3.5786712169647217


Processing epoch 00:  70%|██████▉   | 3378/4850 [15:30<07:09,  3.43it/s]

Epoch: 1, Loss: 3.1372199058532715


Processing epoch 00:  70%|██████▉   | 3379/4850 [15:30<07:11,  3.41it/s]

Epoch: 1, Loss: 2.8741188049316406


Processing epoch 00:  70%|██████▉   | 3380/4850 [15:31<07:04,  3.46it/s]

Epoch: 1, Loss: 3.638641357421875


Processing epoch 00:  70%|██████▉   | 3381/4850 [15:31<06:57,  3.52it/s]

Epoch: 1, Loss: 3.3977744579315186


Processing epoch 00:  70%|██████▉   | 3382/4850 [15:31<06:53,  3.55it/s]

Epoch: 1, Loss: 2.8604958057403564


Processing epoch 00:  70%|██████▉   | 3383/4850 [15:32<06:50,  3.58it/s]

Epoch: 1, Loss: 2.2872843742370605


Processing epoch 00:  70%|██████▉   | 3384/4850 [15:32<06:48,  3.59it/s]

Epoch: 1, Loss: 3.314457654953003


Processing epoch 00:  70%|██████▉   | 3385/4850 [15:32<06:45,  3.61it/s]

Epoch: 1, Loss: 4.063449859619141


Processing epoch 00:  70%|██████▉   | 3386/4850 [15:32<06:42,  3.64it/s]

Epoch: 1, Loss: 4.168771266937256


Processing epoch 00:  70%|██████▉   | 3387/4850 [15:33<06:44,  3.61it/s]

Epoch: 1, Loss: 3.302506446838379


Processing epoch 00:  70%|██████▉   | 3388/4850 [15:33<06:45,  3.61it/s]

Epoch: 1, Loss: 3.0740904808044434


Processing epoch 00:  70%|██████▉   | 3389/4850 [15:33<06:43,  3.62it/s]

Epoch: 1, Loss: 3.8180224895477295


Processing epoch 00:  70%|██████▉   | 3390/4850 [15:33<06:43,  3.62it/s]

Epoch: 1, Loss: 2.6201462745666504


Processing epoch 00:  70%|██████▉   | 3391/4850 [15:34<06:45,  3.60it/s]

Epoch: 1, Loss: 2.607811689376831


Processing epoch 00:  70%|██████▉   | 3392/4850 [15:34<06:43,  3.62it/s]

Epoch: 1, Loss: 3.2192635536193848


Processing epoch 00:  70%|██████▉   | 3393/4850 [15:34<06:42,  3.62it/s]

Epoch: 1, Loss: 2.775102138519287


Processing epoch 00:  70%|██████▉   | 3394/4850 [15:35<06:40,  3.63it/s]

Epoch: 1, Loss: 3.287945032119751


Processing epoch 00:  70%|███████   | 3395/4850 [15:35<06:40,  3.63it/s]

Epoch: 1, Loss: 3.2438454627990723


Processing epoch 00:  70%|███████   | 3396/4850 [15:35<06:40,  3.63it/s]

Epoch: 1, Loss: 3.664576530456543


Processing epoch 00:  70%|███████   | 3397/4850 [15:35<06:40,  3.63it/s]

Epoch: 1, Loss: 3.654233455657959


Processing epoch 00:  70%|███████   | 3398/4850 [15:36<06:38,  3.65it/s]

Epoch: 1, Loss: 4.953780174255371


Processing epoch 00:  70%|███████   | 3399/4850 [15:36<06:37,  3.65it/s]

Epoch: 1, Loss: 2.5371522903442383


Processing epoch 00:  70%|███████   | 3400/4850 [15:36<06:36,  3.66it/s]

Epoch: 1, Loss: 3.6591339111328125


Processing epoch 00:  70%|███████   | 3401/4850 [15:36<06:36,  3.66it/s]

Epoch: 1, Loss: 2.9360954761505127


Processing epoch 00:  70%|███████   | 3402/4850 [15:37<06:39,  3.63it/s]

Epoch: 1, Loss: 2.9811110496520996


Processing epoch 00:  70%|███████   | 3403/4850 [15:37<06:37,  3.64it/s]

Epoch: 1, Loss: 4.253635883331299


Processing epoch 00:  70%|███████   | 3404/4850 [15:37<06:36,  3.65it/s]

Epoch: 1, Loss: 3.8354883193969727


Processing epoch 00:  70%|███████   | 3405/4850 [15:38<06:38,  3.63it/s]

Epoch: 1, Loss: 3.2404372692108154


Processing epoch 00:  70%|███████   | 3406/4850 [15:38<06:38,  3.62it/s]

Epoch: 1, Loss: 3.1107678413391113


Processing epoch 00:  70%|███████   | 3407/4850 [15:38<06:38,  3.62it/s]

Epoch: 1, Loss: 2.5758328437805176


Processing epoch 00:  70%|███████   | 3408/4850 [15:38<06:39,  3.61it/s]

Epoch: 1, Loss: 4.139542579650879


Processing epoch 00:  70%|███████   | 3409/4850 [15:39<06:37,  3.63it/s]

Epoch: 1, Loss: 3.2700393199920654


Processing epoch 00:  70%|███████   | 3410/4850 [15:39<06:35,  3.64it/s]

Epoch: 1, Loss: 3.2621941566467285


Processing epoch 00:  70%|███████   | 3411/4850 [15:39<06:34,  3.64it/s]

Epoch: 1, Loss: 3.52296781539917


Processing epoch 00:  70%|███████   | 3412/4850 [15:40<06:33,  3.65it/s]

Epoch: 1, Loss: 3.175072193145752


Processing epoch 00:  70%|███████   | 3413/4850 [15:40<06:33,  3.65it/s]

Epoch: 1, Loss: 3.129591941833496


Processing epoch 00:  70%|███████   | 3414/4850 [15:40<06:33,  3.65it/s]

Epoch: 1, Loss: 3.9899487495422363


Processing epoch 00:  70%|███████   | 3415/4850 [15:40<06:32,  3.66it/s]

Epoch: 1, Loss: 3.0846219062805176


Processing epoch 00:  70%|███████   | 3416/4850 [15:41<06:41,  3.57it/s]

Epoch: 1, Loss: 3.375725746154785


Processing epoch 00:  70%|███████   | 3417/4850 [15:41<06:44,  3.54it/s]

Epoch: 1, Loss: 3.3451147079467773


Processing epoch 00:  70%|███████   | 3418/4850 [15:41<06:52,  3.47it/s]

Epoch: 1, Loss: 3.243244171142578


Processing epoch 00:  70%|███████   | 3419/4850 [15:41<06:52,  3.47it/s]

Epoch: 1, Loss: 3.438417434692383


Processing epoch 00:  71%|███████   | 3420/4850 [15:42<06:50,  3.49it/s]

Epoch: 1, Loss: 3.9014155864715576


Processing epoch 00:  71%|███████   | 3421/4850 [15:42<06:47,  3.50it/s]

Epoch: 1, Loss: 3.696730613708496


Processing epoch 00:  71%|███████   | 3422/4850 [15:42<06:46,  3.51it/s]

Epoch: 1, Loss: 3.085932731628418


Processing epoch 00:  71%|███████   | 3423/4850 [15:43<06:52,  3.46it/s]

Epoch: 1, Loss: 3.0114822387695312


Processing epoch 00:  71%|███████   | 3424/4850 [15:43<06:57,  3.41it/s]

Epoch: 1, Loss: 4.199602127075195


Processing epoch 00:  71%|███████   | 3425/4850 [15:43<06:54,  3.44it/s]

Epoch: 1, Loss: 4.485502243041992


Processing epoch 00:  71%|███████   | 3426/4850 [15:44<06:50,  3.47it/s]

Epoch: 1, Loss: 4.081769943237305


Processing epoch 00:  71%|███████   | 3427/4850 [15:44<06:54,  3.43it/s]

Epoch: 1, Loss: 2.8471555709838867


Processing epoch 00:  71%|███████   | 3428/4850 [15:44<06:50,  3.46it/s]

Epoch: 1, Loss: 3.1804542541503906


Processing epoch 00:  71%|███████   | 3429/4850 [15:44<06:44,  3.51it/s]

Epoch: 1, Loss: 3.469846725463867


Processing epoch 00:  71%|███████   | 3430/4850 [15:45<06:40,  3.54it/s]

Epoch: 1, Loss: 3.568908452987671


Processing epoch 00:  71%|███████   | 3431/4850 [15:45<06:37,  3.57it/s]

Epoch: 1, Loss: 3.185558319091797


Processing epoch 00:  71%|███████   | 3432/4850 [15:45<06:35,  3.58it/s]

Epoch: 1, Loss: 3.3324055671691895


Processing epoch 00:  71%|███████   | 3433/4850 [15:45<06:34,  3.59it/s]

Epoch: 1, Loss: 3.227407932281494


Processing epoch 00:  71%|███████   | 3434/4850 [15:46<06:33,  3.60it/s]

Epoch: 1, Loss: 3.741499423980713


Processing epoch 00:  71%|███████   | 3435/4850 [15:46<06:35,  3.58it/s]

Epoch: 1, Loss: 3.7706494331359863


Processing epoch 00:  71%|███████   | 3436/4850 [15:46<06:32,  3.61it/s]

Epoch: 1, Loss: 3.4283528327941895


Processing epoch 00:  71%|███████   | 3437/4850 [15:47<06:30,  3.62it/s]

Epoch: 1, Loss: 3.6854898929595947


Processing epoch 00:  71%|███████   | 3438/4850 [15:47<06:28,  3.64it/s]

Epoch: 1, Loss: 4.0265212059021


Processing epoch 00:  71%|███████   | 3439/4850 [15:47<06:31,  3.61it/s]

Epoch: 1, Loss: 2.7638726234436035


Processing epoch 00:  71%|███████   | 3440/4850 [15:47<06:30,  3.61it/s]

Epoch: 1, Loss: 2.707723617553711


Processing epoch 00:  71%|███████   | 3441/4850 [15:48<06:28,  3.63it/s]

Epoch: 1, Loss: 3.0706043243408203


Processing epoch 00:  71%|███████   | 3442/4850 [15:48<06:29,  3.61it/s]

Epoch: 1, Loss: 2.6436076164245605


Processing epoch 00:  71%|███████   | 3443/4850 [15:48<06:27,  3.63it/s]

Epoch: 1, Loss: 2.7708587646484375


Processing epoch 00:  71%|███████   | 3444/4850 [15:49<06:26,  3.64it/s]

Epoch: 1, Loss: 3.231360912322998


Processing epoch 00:  71%|███████   | 3445/4850 [15:49<06:25,  3.64it/s]

Epoch: 1, Loss: 3.5457587242126465


Processing epoch 00:  71%|███████   | 3446/4850 [15:49<06:25,  3.64it/s]

Epoch: 1, Loss: 3.6722097396850586


Processing epoch 00:  71%|███████   | 3447/4850 [15:49<06:25,  3.64it/s]

Epoch: 1, Loss: 2.895505428314209


Processing epoch 00:  71%|███████   | 3448/4850 [15:50<06:26,  3.63it/s]

Epoch: 1, Loss: 3.298977851867676


Processing epoch 00:  71%|███████   | 3449/4850 [15:50<06:24,  3.64it/s]

Epoch: 1, Loss: 4.784229755401611


Processing epoch 00:  71%|███████   | 3450/4850 [15:50<06:24,  3.64it/s]

Epoch: 1, Loss: 2.6075267791748047


Processing epoch 00:  71%|███████   | 3451/4850 [15:50<06:24,  3.64it/s]

Epoch: 1, Loss: 3.765308380126953


Processing epoch 00:  71%|███████   | 3452/4850 [15:51<06:24,  3.64it/s]

Epoch: 1, Loss: 3.625917434692383


Processing epoch 00:  71%|███████   | 3453/4850 [15:51<06:23,  3.64it/s]

Epoch: 1, Loss: 3.4811558723449707


Processing epoch 00:  71%|███████   | 3454/4850 [15:51<06:26,  3.61it/s]

Epoch: 1, Loss: 2.6856956481933594


Processing epoch 00:  71%|███████   | 3455/4850 [15:52<06:27,  3.60it/s]

Epoch: 1, Loss: 3.3431055545806885


Processing epoch 00:  71%|███████▏  | 3456/4850 [15:52<06:25,  3.62it/s]

Epoch: 1, Loss: 3.1946403980255127


Processing epoch 00:  71%|███████▏  | 3457/4850 [15:52<06:24,  3.63it/s]

Epoch: 1, Loss: 3.2942051887512207


Processing epoch 00:  71%|███████▏  | 3458/4850 [15:52<06:23,  3.63it/s]

Epoch: 1, Loss: 3.177666187286377


Processing epoch 00:  71%|███████▏  | 3459/4850 [15:53<06:23,  3.62it/s]

Epoch: 1, Loss: 2.9741365909576416


Processing epoch 00:  71%|███████▏  | 3460/4850 [15:53<06:23,  3.63it/s]

Epoch: 1, Loss: 3.586984157562256


Processing epoch 00:  71%|███████▏  | 3461/4850 [15:53<06:23,  3.62it/s]

Epoch: 1, Loss: 3.1327004432678223


Processing epoch 00:  71%|███████▏  | 3462/4850 [15:53<06:21,  3.63it/s]

Epoch: 1, Loss: 3.245027542114258


Processing epoch 00:  71%|███████▏  | 3463/4850 [15:54<06:21,  3.64it/s]

Epoch: 1, Loss: 2.9825832843780518


Processing epoch 00:  71%|███████▏  | 3464/4850 [15:54<06:22,  3.62it/s]

Epoch: 1, Loss: 3.209874391555786


Processing epoch 00:  71%|███████▏  | 3465/4850 [15:54<06:29,  3.56it/s]

Epoch: 1, Loss: 3.0914645195007324


Processing epoch 00:  71%|███████▏  | 3466/4850 [15:55<06:33,  3.52it/s]

Epoch: 1, Loss: 3.2659921646118164


Processing epoch 00:  71%|███████▏  | 3467/4850 [15:55<06:37,  3.48it/s]

Epoch: 1, Loss: 3.5073399543762207


Processing epoch 00:  72%|███████▏  | 3468/4850 [15:55<06:36,  3.48it/s]

Epoch: 1, Loss: 2.614335536956787


Processing epoch 00:  72%|███████▏  | 3469/4850 [15:55<06:36,  3.48it/s]

Epoch: 1, Loss: 3.670595407485962


Processing epoch 00:  72%|███████▏  | 3470/4850 [15:56<06:38,  3.46it/s]

Epoch: 1, Loss: 3.169536590576172


Processing epoch 00:  72%|███████▏  | 3471/4850 [15:56<06:42,  3.42it/s]

Epoch: 1, Loss: 3.328246593475342


Processing epoch 00:  72%|███████▏  | 3472/4850 [15:56<06:42,  3.43it/s]

Epoch: 1, Loss: 3.854904890060425


Processing epoch 00:  72%|███████▏  | 3473/4850 [15:57<06:41,  3.43it/s]

Epoch: 1, Loss: 4.252708911895752


Processing epoch 00:  72%|███████▏  | 3474/4850 [15:57<06:44,  3.40it/s]

Epoch: 1, Loss: 2.753005027770996


Processing epoch 00:  72%|███████▏  | 3475/4850 [15:57<06:42,  3.41it/s]

Epoch: 1, Loss: 2.9027490615844727


Processing epoch 00:  72%|███████▏  | 3476/4850 [15:58<06:41,  3.42it/s]

Epoch: 1, Loss: 3.3057925701141357


Processing epoch 00:  72%|███████▏  | 3477/4850 [15:58<06:35,  3.47it/s]

Epoch: 1, Loss: 3.0862231254577637


Processing epoch 00:  72%|███████▏  | 3478/4850 [15:58<06:30,  3.51it/s]

Epoch: 1, Loss: 3.098417282104492


Processing epoch 00:  72%|███████▏  | 3479/4850 [15:58<06:25,  3.55it/s]

Epoch: 1, Loss: 2.8516616821289062


Processing epoch 00:  72%|███████▏  | 3480/4850 [15:59<06:24,  3.57it/s]

Epoch: 1, Loss: 2.9148876667022705


Processing epoch 00:  72%|███████▏  | 3481/4850 [15:59<06:21,  3.59it/s]

Epoch: 1, Loss: 3.4878783226013184


Processing epoch 00:  72%|███████▏  | 3482/4850 [15:59<06:19,  3.60it/s]

Epoch: 1, Loss: 2.853261947631836


Processing epoch 00:  72%|███████▏  | 3483/4850 [15:59<06:18,  3.61it/s]

Epoch: 1, Loss: 4.666727066040039


Processing epoch 00:  72%|███████▏  | 3484/4850 [16:00<06:19,  3.60it/s]

Epoch: 1, Loss: 4.801586151123047


Processing epoch 00:  72%|███████▏  | 3485/4850 [16:00<06:16,  3.62it/s]

Epoch: 1, Loss: 3.0033297538757324


Processing epoch 00:  72%|███████▏  | 3486/4850 [16:00<06:17,  3.61it/s]

Epoch: 1, Loss: 3.3628182411193848


Processing epoch 00:  72%|███████▏  | 3487/4850 [16:01<06:16,  3.62it/s]

Epoch: 1, Loss: 3.072312355041504


Processing epoch 00:  72%|███████▏  | 3488/4850 [16:01<06:14,  3.64it/s]

Epoch: 1, Loss: 4.110471248626709


Processing epoch 00:  72%|███████▏  | 3489/4850 [16:01<06:14,  3.64it/s]

Epoch: 1, Loss: 2.657499313354492


Processing epoch 00:  72%|███████▏  | 3490/4850 [16:01<06:12,  3.65it/s]

Epoch: 1, Loss: 3.7083864212036133


Processing epoch 00:  72%|███████▏  | 3491/4850 [16:02<06:18,  3.59it/s]

Epoch: 1, Loss: 4.071195125579834


Processing epoch 00:  72%|███████▏  | 3492/4850 [16:02<06:15,  3.61it/s]

Epoch: 1, Loss: 3.704521894454956


Processing epoch 00:  72%|███████▏  | 3493/4850 [16:02<06:14,  3.62it/s]

Epoch: 1, Loss: 3.343954086303711


Processing epoch 00:  72%|███████▏  | 3494/4850 [16:02<06:14,  3.62it/s]

Epoch: 1, Loss: 3.2780213356018066


Processing epoch 00:  72%|███████▏  | 3495/4850 [16:03<06:15,  3.61it/s]

Epoch: 1, Loss: 2.590806245803833


Processing epoch 00:  72%|███████▏  | 3496/4850 [16:03<06:15,  3.61it/s]

Epoch: 1, Loss: 3.459622383117676


Processing epoch 00:  72%|███████▏  | 3497/4850 [16:03<06:12,  3.63it/s]

Epoch: 1, Loss: 3.321101665496826


Processing epoch 00:  72%|███████▏  | 3498/4850 [16:04<06:12,  3.63it/s]

Epoch: 1, Loss: 3.4249753952026367


Processing epoch 00:  72%|███████▏  | 3499/4850 [16:04<06:12,  3.63it/s]

Epoch: 1, Loss: 2.8398802280426025


Processing epoch 00:  72%|███████▏  | 3500/4850 [16:04<06:11,  3.63it/s]

Epoch: 1, Loss: 3.066256046295166


Processing epoch 00:  72%|███████▏  | 3501/4850 [16:04<06:12,  3.62it/s]

Epoch: 1, Loss: 3.765566825866699


Processing epoch 00:  72%|███████▏  | 3502/4850 [16:05<06:14,  3.60it/s]

Epoch: 1, Loss: 3.1485750675201416


Processing epoch 00:  72%|███████▏  | 3503/4850 [16:05<06:13,  3.61it/s]

Epoch: 1, Loss: 2.915604591369629


Processing epoch 00:  72%|███████▏  | 3504/4850 [16:05<06:11,  3.63it/s]

Epoch: 1, Loss: 3.402341604232788


Processing epoch 00:  72%|███████▏  | 3505/4850 [16:06<06:10,  3.63it/s]

Epoch: 1, Loss: 3.512152671813965


Processing epoch 00:  72%|███████▏  | 3506/4850 [16:06<06:12,  3.60it/s]

Epoch: 1, Loss: 3.163054943084717


Processing epoch 00:  72%|███████▏  | 3507/4850 [16:06<06:12,  3.60it/s]

Epoch: 1, Loss: 3.545968532562256


Processing epoch 00:  72%|███████▏  | 3508/4850 [16:06<06:12,  3.60it/s]

Epoch: 1, Loss: 2.8765053749084473


Processing epoch 00:  72%|███████▏  | 3509/4850 [16:07<06:09,  3.62it/s]

Epoch: 1, Loss: 3.1878914833068848


Processing epoch 00:  72%|███████▏  | 3510/4850 [16:07<06:09,  3.63it/s]

Epoch: 1, Loss: 4.138533592224121


Processing epoch 00:  72%|███████▏  | 3511/4850 [16:07<06:06,  3.65it/s]

Epoch: 1, Loss: 4.628292083740234


Processing epoch 00:  72%|███████▏  | 3512/4850 [16:07<06:09,  3.63it/s]

Epoch: 1, Loss: 3.5075788497924805


Processing epoch 00:  72%|███████▏  | 3513/4850 [16:08<06:16,  3.55it/s]

Epoch: 1, Loss: 3.13441801071167


Processing epoch 00:  72%|███████▏  | 3514/4850 [16:08<06:18,  3.53it/s]

Epoch: 1, Loss: 3.878070831298828


Processing epoch 00:  72%|███████▏  | 3515/4850 [16:08<06:18,  3.53it/s]

Epoch: 1, Loss: 3.5866470336914062


Processing epoch 00:  72%|███████▏  | 3516/4850 [16:09<06:17,  3.53it/s]

Epoch: 1, Loss: 4.253554821014404


Processing epoch 00:  73%|███████▎  | 3517/4850 [16:09<06:21,  3.50it/s]

Epoch: 1, Loss: 3.7133965492248535


Processing epoch 00:  73%|███████▎  | 3518/4850 [16:09<06:22,  3.48it/s]

Epoch: 1, Loss: 2.946338176727295


Processing epoch 00:  73%|███████▎  | 3519/4850 [16:09<06:25,  3.45it/s]

Epoch: 1, Loss: 3.2127187252044678


Processing epoch 00:  73%|███████▎  | 3520/4850 [16:10<06:32,  3.38it/s]

Epoch: 1, Loss: 2.88215708732605


Processing epoch 00:  73%|███████▎  | 3521/4850 [16:10<06:32,  3.39it/s]

Epoch: 1, Loss: 3.4799439907073975


Processing epoch 00:  73%|███████▎  | 3522/4850 [16:10<06:27,  3.43it/s]

Epoch: 1, Loss: 2.4191553592681885


Processing epoch 00:  73%|███████▎  | 3523/4850 [16:11<06:29,  3.40it/s]

Epoch: 1, Loss: 2.8260860443115234


Processing epoch 00:  73%|███████▎  | 3524/4850 [16:11<06:22,  3.47it/s]

Epoch: 1, Loss: 3.486042022705078


Processing epoch 00:  73%|███████▎  | 3525/4850 [16:11<06:15,  3.52it/s]

Epoch: 1, Loss: 3.595562219619751


Processing epoch 00:  73%|███████▎  | 3526/4850 [16:12<06:12,  3.56it/s]

Epoch: 1, Loss: 3.848905324935913


Processing epoch 00:  73%|███████▎  | 3527/4850 [16:12<06:09,  3.58it/s]

Epoch: 1, Loss: 3.7943742275238037


Processing epoch 00:  73%|███████▎  | 3528/4850 [16:12<06:10,  3.57it/s]

Epoch: 1, Loss: 3.9271769523620605


Processing epoch 00:  73%|███████▎  | 3529/4850 [16:12<06:08,  3.59it/s]

Epoch: 1, Loss: 2.8177013397216797


Processing epoch 00:  73%|███████▎  | 3530/4850 [16:13<06:05,  3.61it/s]

Epoch: 1, Loss: 3.7412447929382324


Processing epoch 00:  73%|███████▎  | 3531/4850 [16:13<06:05,  3.61it/s]

Epoch: 1, Loss: 3.3821942806243896


Processing epoch 00:  73%|███████▎  | 3532/4850 [16:13<06:04,  3.62it/s]

Epoch: 1, Loss: 3.0262722969055176


Processing epoch 00:  73%|███████▎  | 3533/4850 [16:13<06:03,  3.62it/s]

Epoch: 1, Loss: 3.5083541870117188


Processing epoch 00:  73%|███████▎  | 3534/4850 [16:14<06:03,  3.62it/s]

Epoch: 1, Loss: 2.842461109161377


Processing epoch 00:  73%|███████▎  | 3535/4850 [16:14<06:04,  3.60it/s]

Epoch: 1, Loss: 3.041914463043213


Processing epoch 00:  73%|███████▎  | 3536/4850 [16:14<06:04,  3.61it/s]

Epoch: 1, Loss: 2.6942014694213867


Processing epoch 00:  73%|███████▎  | 3537/4850 [16:15<06:02,  3.62it/s]

Epoch: 1, Loss: 3.9592151641845703


Processing epoch 00:  73%|███████▎  | 3538/4850 [16:15<06:02,  3.62it/s]

Epoch: 1, Loss: 3.2454404830932617


Processing epoch 00:  73%|███████▎  | 3539/4850 [16:15<06:01,  3.63it/s]

Epoch: 1, Loss: 3.129678964614868


Processing epoch 00:  73%|███████▎  | 3540/4850 [16:15<05:59,  3.64it/s]

Epoch: 1, Loss: 3.7035274505615234


Processing epoch 00:  73%|███████▎  | 3541/4850 [16:16<05:58,  3.65it/s]

Epoch: 1, Loss: 3.1893067359924316


Processing epoch 00:  73%|███████▎  | 3542/4850 [16:16<05:59,  3.64it/s]

Epoch: 1, Loss: 2.9922618865966797


Processing epoch 00:  73%|███████▎  | 3543/4850 [16:16<06:01,  3.61it/s]

Epoch: 1, Loss: 3.3003993034362793


Processing epoch 00:  73%|███████▎  | 3544/4850 [16:16<06:01,  3.61it/s]

Epoch: 1, Loss: 2.515953540802002


Processing epoch 00:  73%|███████▎  | 3545/4850 [16:17<06:01,  3.61it/s]

Epoch: 1, Loss: 2.827270030975342


Processing epoch 00:  73%|███████▎  | 3546/4850 [16:17<06:00,  3.62it/s]

Epoch: 1, Loss: 3.2973408699035645


Processing epoch 00:  73%|███████▎  | 3547/4850 [16:17<06:00,  3.61it/s]

Epoch: 1, Loss: 3.1188621520996094


Processing epoch 00:  73%|███████▎  | 3548/4850 [16:18<06:00,  3.61it/s]

Epoch: 1, Loss: 2.6363673210144043


Processing epoch 00:  73%|███████▎  | 3549/4850 [16:18<05:59,  3.61it/s]

Epoch: 1, Loss: 3.17262601852417


Processing epoch 00:  73%|███████▎  | 3550/4850 [16:18<05:58,  3.63it/s]

Epoch: 1, Loss: 4.092611789703369


Processing epoch 00:  73%|███████▎  | 3551/4850 [16:18<05:58,  3.62it/s]

Epoch: 1, Loss: 3.475850820541382


Processing epoch 00:  73%|███████▎  | 3552/4850 [16:19<05:57,  3.63it/s]

Epoch: 1, Loss: 3.252194404602051


Processing epoch 00:  73%|███████▎  | 3553/4850 [16:19<05:56,  3.64it/s]

Epoch: 1, Loss: 3.977910280227661


Processing epoch 00:  73%|███████▎  | 3554/4850 [16:19<05:55,  3.64it/s]

Epoch: 1, Loss: 3.045649528503418


Processing epoch 00:  73%|███████▎  | 3555/4850 [16:20<05:55,  3.65it/s]

Epoch: 1, Loss: 3.469132423400879


Processing epoch 00:  73%|███████▎  | 3556/4850 [16:20<05:54,  3.65it/s]

Epoch: 1, Loss: 3.317685127258301


Processing epoch 00:  73%|███████▎  | 3557/4850 [16:20<05:54,  3.65it/s]

Epoch: 1, Loss: 4.781306743621826


Processing epoch 00:  73%|███████▎  | 3558/4850 [16:20<05:55,  3.63it/s]

Epoch: 1, Loss: 2.9583568572998047


Processing epoch 00:  73%|███████▎  | 3559/4850 [16:21<05:55,  3.63it/s]

Epoch: 1, Loss: 2.750840663909912


Processing epoch 00:  73%|███████▎  | 3560/4850 [16:21<05:59,  3.59it/s]

Epoch: 1, Loss: 3.066821813583374


Processing epoch 00:  73%|███████▎  | 3561/4850 [16:21<06:04,  3.53it/s]

Epoch: 1, Loss: 3.0621588230133057


Processing epoch 00:  73%|███████▎  | 3562/4850 [16:21<06:10,  3.48it/s]

Epoch: 1, Loss: 2.7851133346557617


Processing epoch 00:  73%|███████▎  | 3563/4850 [16:22<06:07,  3.50it/s]

Epoch: 1, Loss: 4.263243198394775


Processing epoch 00:  73%|███████▎  | 3564/4850 [16:22<06:07,  3.50it/s]

Epoch: 1, Loss: 3.7572903633117676


Processing epoch 00:  74%|███████▎  | 3565/4850 [16:22<06:05,  3.51it/s]

Epoch: 1, Loss: 3.175462007522583


Processing epoch 00:  74%|███████▎  | 3566/4850 [16:23<06:09,  3.48it/s]

Epoch: 1, Loss: 3.0955893993377686


Processing epoch 00:  74%|███████▎  | 3567/4850 [16:23<06:14,  3.42it/s]

Epoch: 1, Loss: 3.2886531352996826


Processing epoch 00:  74%|███████▎  | 3568/4850 [16:23<06:14,  3.42it/s]

Epoch: 1, Loss: 3.6633548736572266


Processing epoch 00:  74%|███████▎  | 3569/4850 [16:24<06:14,  3.42it/s]

Epoch: 1, Loss: 4.1590800285339355


Processing epoch 00:  74%|███████▎  | 3570/4850 [16:24<06:20,  3.36it/s]

Epoch: 1, Loss: 3.4838786125183105


Processing epoch 00:  74%|███████▎  | 3571/4850 [16:24<06:19,  3.37it/s]

Epoch: 1, Loss: 3.7699880599975586


Processing epoch 00:  74%|███████▎  | 3572/4850 [16:24<06:11,  3.44it/s]

Epoch: 1, Loss: 2.624410629272461


Processing epoch 00:  74%|███████▎  | 3573/4850 [16:25<06:04,  3.50it/s]

Epoch: 1, Loss: 3.610208034515381


Processing epoch 00:  74%|███████▎  | 3574/4850 [16:25<06:00,  3.54it/s]

Epoch: 1, Loss: 2.7640295028686523


Processing epoch 00:  74%|███████▎  | 3575/4850 [16:25<05:56,  3.58it/s]

Epoch: 1, Loss: 4.328113555908203


Processing epoch 00:  74%|███████▎  | 3576/4850 [16:25<05:53,  3.60it/s]

Epoch: 1, Loss: 4.309521198272705


Processing epoch 00:  74%|███████▍  | 3577/4850 [16:26<05:53,  3.60it/s]

Epoch: 1, Loss: 3.7635269165039062


Processing epoch 00:  74%|███████▍  | 3578/4850 [16:26<05:53,  3.60it/s]

Epoch: 1, Loss: 2.8649816513061523


Processing epoch 00:  74%|███████▍  | 3579/4850 [16:26<05:52,  3.61it/s]

Epoch: 1, Loss: 4.0990166664123535


Processing epoch 00:  74%|███████▍  | 3580/4850 [16:27<05:51,  3.61it/s]

Epoch: 1, Loss: 2.8069307804107666


Processing epoch 00:  74%|███████▍  | 3581/4850 [16:27<05:49,  3.63it/s]

Epoch: 1, Loss: 3.9563870429992676


Processing epoch 00:  74%|███████▍  | 3582/4850 [16:27<05:50,  3.62it/s]

Epoch: 1, Loss: 2.779285430908203


Processing epoch 00:  74%|███████▍  | 3583/4850 [16:27<05:49,  3.63it/s]

Epoch: 1, Loss: 3.3427276611328125


Processing epoch 00:  74%|███████▍  | 3584/4850 [16:28<05:48,  3.64it/s]

Epoch: 1, Loss: 3.0751771926879883


Processing epoch 00:  74%|███████▍  | 3585/4850 [16:28<05:46,  3.65it/s]

Epoch: 1, Loss: 3.317147731781006


Processing epoch 00:  74%|███████▍  | 3586/4850 [16:28<05:46,  3.65it/s]

Epoch: 1, Loss: 3.2754411697387695


Processing epoch 00:  74%|███████▍  | 3587/4850 [16:29<05:45,  3.65it/s]

Epoch: 1, Loss: 3.7451460361480713


Processing epoch 00:  74%|███████▍  | 3588/4850 [16:29<05:45,  3.65it/s]

Epoch: 1, Loss: 3.004410743713379


Processing epoch 00:  74%|███████▍  | 3589/4850 [16:29<05:45,  3.65it/s]

Epoch: 1, Loss: 2.8613595962524414


Processing epoch 00:  74%|███████▍  | 3590/4850 [16:29<05:45,  3.65it/s]

Epoch: 1, Loss: 3.643446207046509


Processing epoch 00:  74%|███████▍  | 3591/4850 [16:30<05:50,  3.59it/s]

Epoch: 1, Loss: 2.3045711517333984


Processing epoch 00:  74%|███████▍  | 3592/4850 [16:30<05:50,  3.59it/s]

Epoch: 1, Loss: 2.539771556854248


Processing epoch 00:  74%|███████▍  | 3593/4850 [16:30<05:48,  3.61it/s]

Epoch: 1, Loss: 3.873664617538452


Processing epoch 00:  74%|███████▍  | 3594/4850 [16:30<05:47,  3.61it/s]

Epoch: 1, Loss: 3.079216480255127


Processing epoch 00:  74%|███████▍  | 3595/4850 [16:31<05:47,  3.61it/s]

Epoch: 1, Loss: 3.8140337467193604


Processing epoch 00:  74%|███████▍  | 3596/4850 [16:31<05:45,  3.63it/s]

Epoch: 1, Loss: 4.666491508483887


Processing epoch 00:  74%|███████▍  | 3597/4850 [16:31<05:45,  3.63it/s]

Epoch: 1, Loss: 3.639937162399292


Processing epoch 00:  74%|███████▍  | 3598/4850 [16:32<05:47,  3.60it/s]

Epoch: 1, Loss: 3.7393736839294434


Processing epoch 00:  74%|███████▍  | 3599/4850 [16:32<05:46,  3.62it/s]

Epoch: 1, Loss: 3.6244688034057617


Processing epoch 00:  74%|███████▍  | 3600/4850 [16:32<05:47,  3.60it/s]

Epoch: 1, Loss: 2.9247946739196777


Processing epoch 00:  74%|███████▍  | 3601/4850 [16:32<05:48,  3.58it/s]

Epoch: 1, Loss: 2.6964292526245117


Processing epoch 00:  74%|███████▍  | 3602/4850 [16:33<05:49,  3.57it/s]

Epoch: 1, Loss: 3.338996410369873


Processing epoch 00:  74%|███████▍  | 3603/4850 [16:33<05:47,  3.59it/s]

Epoch: 1, Loss: 2.558896064758301


Processing epoch 00:  74%|███████▍  | 3604/4850 [16:33<05:45,  3.61it/s]

Epoch: 1, Loss: 4.2639031410217285


Processing epoch 00:  74%|███████▍  | 3605/4850 [16:34<05:44,  3.62it/s]

Epoch: 1, Loss: 3.4886739253997803


Processing epoch 00:  74%|███████▍  | 3606/4850 [16:34<05:42,  3.63it/s]

Epoch: 1, Loss: 3.931942939758301


Processing epoch 00:  74%|███████▍  | 3607/4850 [16:34<05:43,  3.61it/s]

Epoch: 1, Loss: 3.1364283561706543


Processing epoch 00:  74%|███████▍  | 3608/4850 [16:34<05:51,  3.54it/s]

Epoch: 1, Loss: 2.934725284576416


Processing epoch 00:  74%|███████▍  | 3609/4850 [16:35<05:53,  3.52it/s]

Epoch: 1, Loss: 2.6491005420684814


Processing epoch 00:  74%|███████▍  | 3610/4850 [16:35<05:56,  3.48it/s]

Epoch: 1, Loss: 4.325397968292236


Processing epoch 00:  74%|███████▍  | 3611/4850 [16:35<06:02,  3.42it/s]

Epoch: 1, Loss: 3.544506072998047


Processing epoch 00:  74%|███████▍  | 3612/4850 [16:36<06:03,  3.41it/s]

Epoch: 1, Loss: 2.854924201965332


Processing epoch 00:  74%|███████▍  | 3613/4850 [16:36<06:03,  3.40it/s]

Epoch: 1, Loss: 3.5264530181884766


Processing epoch 00:  75%|███████▍  | 3614/4850 [16:36<05:57,  3.46it/s]

Epoch: 1, Loss: 4.094285011291504


Processing epoch 00:  75%|███████▍  | 3615/4850 [16:36<05:58,  3.45it/s]

Epoch: 1, Loss: 3.207321882247925


Processing epoch 00:  75%|███████▍  | 3616/4850 [16:37<05:57,  3.45it/s]

Epoch: 1, Loss: 3.6973910331726074


Processing epoch 00:  75%|███████▍  | 3617/4850 [16:37<06:02,  3.40it/s]

Epoch: 1, Loss: 3.484631061553955


Processing epoch 00:  75%|███████▍  | 3618/4850 [16:37<06:12,  3.31it/s]

Epoch: 1, Loss: 3.1027348041534424


Processing epoch 00:  75%|███████▍  | 3619/4850 [16:38<06:02,  3.40it/s]

Epoch: 1, Loss: 3.77207088470459


Processing epoch 00:  75%|███████▍  | 3620/4850 [16:38<05:55,  3.46it/s]

Epoch: 1, Loss: 3.793018341064453


Processing epoch 00:  75%|███████▍  | 3621/4850 [16:38<05:50,  3.51it/s]

Epoch: 1, Loss: 2.99794340133667


Processing epoch 00:  75%|███████▍  | 3622/4850 [16:38<05:57,  3.44it/s]

Epoch: 1, Loss: 2.9499893188476562


Processing epoch 00:  75%|███████▍  | 3623/4850 [16:39<05:56,  3.44it/s]

Epoch: 1, Loss: 4.689306259155273


Processing epoch 00:  75%|███████▍  | 3624/4850 [16:39<05:59,  3.41it/s]

Epoch: 1, Loss: 2.458197593688965


Processing epoch 00:  75%|███████▍  | 3625/4850 [16:39<06:00,  3.40it/s]

Epoch: 1, Loss: 3.8595948219299316


Processing epoch 00:  75%|███████▍  | 3626/4850 [16:40<05:56,  3.44it/s]

Epoch: 1, Loss: 3.375668525695801


Processing epoch 00:  75%|███████▍  | 3627/4850 [16:40<05:52,  3.47it/s]

Epoch: 1, Loss: 2.9501895904541016


Processing epoch 00:  75%|███████▍  | 3628/4850 [16:40<05:59,  3.40it/s]

Epoch: 1, Loss: 3.2599635124206543


Processing epoch 00:  75%|███████▍  | 3629/4850 [16:41<05:58,  3.40it/s]

Epoch: 1, Loss: 3.467898368835449


Processing epoch 00:  75%|███████▍  | 3630/4850 [16:41<06:03,  3.36it/s]

Epoch: 1, Loss: 3.1215755939483643


Processing epoch 00:  75%|███████▍  | 3631/4850 [16:41<06:04,  3.35it/s]

Epoch: 1, Loss: 3.3595714569091797


Processing epoch 00:  75%|███████▍  | 3632/4850 [16:41<06:07,  3.32it/s]

Epoch: 1, Loss: 2.331172227859497


Processing epoch 00:  75%|███████▍  | 3633/4850 [16:42<05:59,  3.39it/s]

Epoch: 1, Loss: 2.793941020965576


Processing epoch 00:  75%|███████▍  | 3634/4850 [16:42<05:51,  3.46it/s]

Epoch: 1, Loss: 3.875868082046509


Processing epoch 00:  75%|███████▍  | 3635/4850 [16:42<05:47,  3.50it/s]

Epoch: 1, Loss: 3.646188497543335


Processing epoch 00:  75%|███████▍  | 3636/4850 [16:43<05:42,  3.55it/s]

Epoch: 1, Loss: 3.2055416107177734


Processing epoch 00:  75%|███████▍  | 3637/4850 [16:43<05:38,  3.58it/s]

Epoch: 1, Loss: 3.379887104034424


Processing epoch 00:  75%|███████▌  | 3638/4850 [16:43<05:37,  3.59it/s]

Epoch: 1, Loss: 3.218351364135742


Processing epoch 00:  75%|███████▌  | 3639/4850 [16:43<05:36,  3.60it/s]

Epoch: 1, Loss: 3.199862003326416


Processing epoch 00:  75%|███████▌  | 3640/4850 [16:44<05:35,  3.61it/s]

Epoch: 1, Loss: 2.994804620742798


Processing epoch 00:  75%|███████▌  | 3641/4850 [16:44<05:33,  3.62it/s]

Epoch: 1, Loss: 3.2997593879699707


Processing epoch 00:  75%|███████▌  | 3642/4850 [16:44<05:36,  3.60it/s]

Epoch: 1, Loss: 4.549575328826904


Processing epoch 00:  75%|███████▌  | 3643/4850 [16:44<05:34,  3.60it/s]

Epoch: 1, Loss: 3.3645002841949463


Processing epoch 00:  75%|███████▌  | 3644/4850 [16:45<05:34,  3.61it/s]

Epoch: 1, Loss: 3.5792789459228516


Processing epoch 00:  75%|███████▌  | 3645/4850 [16:45<05:33,  3.61it/s]

Epoch: 1, Loss: 3.1664912700653076


Processing epoch 00:  75%|███████▌  | 3646/4850 [16:45<05:33,  3.61it/s]

Epoch: 1, Loss: 2.7312521934509277


Processing epoch 00:  75%|███████▌  | 3647/4850 [16:46<05:33,  3.61it/s]

Epoch: 1, Loss: 2.4308743476867676


Processing epoch 00:  75%|███████▌  | 3648/4850 [16:46<05:32,  3.62it/s]

Epoch: 1, Loss: 3.292694091796875


Processing epoch 00:  75%|███████▌  | 3649/4850 [16:46<05:30,  3.63it/s]

Epoch: 1, Loss: 2.7881078720092773


Processing epoch 00:  75%|███████▌  | 3650/4850 [16:46<05:30,  3.63it/s]

Epoch: 1, Loss: 4.038000106811523


Processing epoch 00:  75%|███████▌  | 3651/4850 [16:47<05:30,  3.62it/s]

Epoch: 1, Loss: 2.75728702545166


Processing epoch 00:  75%|███████▌  | 3652/4850 [16:47<05:29,  3.63it/s]

Epoch: 1, Loss: 3.400493621826172


Processing epoch 00:  75%|███████▌  | 3653/4850 [16:47<05:29,  3.63it/s]

Epoch: 1, Loss: 3.058227062225342


Processing epoch 00:  75%|███████▌  | 3654/4850 [16:48<05:34,  3.57it/s]

Epoch: 1, Loss: 3.2063956260681152


Processing epoch 00:  75%|███████▌  | 3655/4850 [16:48<05:41,  3.50it/s]

Epoch: 1, Loss: 3.2090134620666504


Processing epoch 00:  75%|███████▌  | 3656/4850 [16:48<05:45,  3.46it/s]

Epoch: 1, Loss: 3.183427572250366


Processing epoch 00:  75%|███████▌  | 3657/4850 [16:48<05:47,  3.43it/s]

Epoch: 1, Loss: 3.7720022201538086


Processing epoch 00:  75%|███████▌  | 3658/4850 [16:49<05:44,  3.46it/s]

Epoch: 1, Loss: 2.8368124961853027


Processing epoch 00:  75%|███████▌  | 3659/4850 [16:49<05:41,  3.49it/s]

Epoch: 1, Loss: 3.3625130653381348


Processing epoch 00:  75%|███████▌  | 3660/4850 [16:49<05:44,  3.45it/s]

Epoch: 1, Loss: 2.6598029136657715


Processing epoch 00:  75%|███████▌  | 3661/4850 [16:50<05:42,  3.47it/s]

Epoch: 1, Loss: 3.0421814918518066


Processing epoch 00:  76%|███████▌  | 3662/4850 [16:50<05:47,  3.42it/s]

Epoch: 1, Loss: 3.0099239349365234


Processing epoch 00:  76%|███████▌  | 3663/4850 [16:50<05:45,  3.43it/s]

Epoch: 1, Loss: 2.694934368133545


Processing epoch 00:  76%|███████▌  | 3664/4850 [16:50<05:48,  3.40it/s]

Epoch: 1, Loss: 3.6766958236694336


Processing epoch 00:  76%|███████▌  | 3665/4850 [16:51<05:53,  3.35it/s]

Epoch: 1, Loss: 2.689539909362793


Processing epoch 00:  76%|███████▌  | 3666/4850 [16:51<05:44,  3.43it/s]

Epoch: 1, Loss: 2.8891937732696533


Processing epoch 00:  76%|███████▌  | 3667/4850 [16:51<05:39,  3.49it/s]

Epoch: 1, Loss: 3.153351306915283


Processing epoch 00:  76%|███████▌  | 3668/4850 [16:52<05:34,  3.53it/s]

Epoch: 1, Loss: 2.9873263835906982


Processing epoch 00:  76%|███████▌  | 3669/4850 [16:52<05:33,  3.55it/s]

Epoch: 1, Loss: 3.371769428253174


Processing epoch 00:  76%|███████▌  | 3670/4850 [16:52<05:31,  3.56it/s]

Epoch: 1, Loss: 2.8426806926727295


Processing epoch 00:  76%|███████▌  | 3671/4850 [16:52<05:28,  3.59it/s]

Epoch: 1, Loss: 3.6714653968811035


Processing epoch 00:  76%|███████▌  | 3672/4850 [16:53<05:25,  3.62it/s]

Epoch: 1, Loss: 3.162409543991089


Processing epoch 00:  76%|███████▌  | 3673/4850 [16:53<05:25,  3.62it/s]

Epoch: 1, Loss: 3.303016424179077


Processing epoch 00:  76%|███████▌  | 3674/4850 [16:53<05:23,  3.64it/s]

Epoch: 1, Loss: 4.207938194274902


Processing epoch 00:  76%|███████▌  | 3675/4850 [16:53<05:25,  3.62it/s]

Epoch: 1, Loss: 2.3827857971191406


Processing epoch 00:  76%|███████▌  | 3676/4850 [16:54<05:23,  3.63it/s]

Epoch: 1, Loss: 3.790874719619751


Processing epoch 00:  76%|███████▌  | 3677/4850 [16:54<05:23,  3.62it/s]

Epoch: 1, Loss: 2.344318389892578


Processing epoch 00:  76%|███████▌  | 3678/4850 [16:54<05:23,  3.62it/s]

Epoch: 1, Loss: 2.7582263946533203


Processing epoch 00:  76%|███████▌  | 3679/4850 [16:55<05:24,  3.61it/s]

Epoch: 1, Loss: 2.948038101196289


Processing epoch 00:  76%|███████▌  | 3680/4850 [16:55<05:23,  3.62it/s]

Epoch: 1, Loss: 3.1386306285858154


Processing epoch 00:  76%|███████▌  | 3681/4850 [16:55<05:22,  3.62it/s]

Epoch: 1, Loss: 2.548377513885498


Processing epoch 00:  76%|███████▌  | 3682/4850 [16:55<05:25,  3.59it/s]

Epoch: 1, Loss: 4.553764820098877


Processing epoch 00:  76%|███████▌  | 3683/4850 [16:56<05:23,  3.61it/s]

Epoch: 1, Loss: 3.5385537147521973


Processing epoch 00:  76%|███████▌  | 3684/4850 [16:56<05:21,  3.62it/s]

Epoch: 1, Loss: 3.6547605991363525


Processing epoch 00:  76%|███████▌  | 3685/4850 [16:56<05:21,  3.63it/s]

Epoch: 1, Loss: 3.2824740409851074


Processing epoch 00:  76%|███████▌  | 3686/4850 [16:57<05:21,  3.62it/s]

Epoch: 1, Loss: 3.2109179496765137


Processing epoch 00:  76%|███████▌  | 3687/4850 [16:57<05:20,  3.63it/s]

Epoch: 1, Loss: 4.8780364990234375


Processing epoch 00:  76%|███████▌  | 3688/4850 [16:57<05:18,  3.65it/s]

Epoch: 1, Loss: 3.798001289367676


Processing epoch 00:  76%|███████▌  | 3689/4850 [16:57<05:18,  3.64it/s]

Epoch: 1, Loss: 2.7153542041778564


Processing epoch 00:  76%|███████▌  | 3690/4850 [16:58<05:22,  3.60it/s]

Epoch: 1, Loss: 2.7566890716552734


Processing epoch 00:  76%|███████▌  | 3691/4850 [16:58<05:20,  3.62it/s]

Epoch: 1, Loss: 2.5410032272338867


Processing epoch 00:  76%|███████▌  | 3692/4850 [16:58<05:20,  3.62it/s]

Epoch: 1, Loss: 2.8579442501068115


Processing epoch 00:  76%|███████▌  | 3693/4850 [16:58<05:19,  3.62it/s]

Epoch: 1, Loss: 2.501162528991699


Processing epoch 00:  76%|███████▌  | 3694/4850 [16:59<05:19,  3.62it/s]

Epoch: 1, Loss: 2.243732452392578


Processing epoch 00:  76%|███████▌  | 3695/4850 [16:59<05:17,  3.63it/s]

Epoch: 1, Loss: 4.075405597686768


Processing epoch 00:  76%|███████▌  | 3696/4850 [16:59<05:16,  3.65it/s]

Epoch: 1, Loss: 3.2334609031677246


Processing epoch 00:  76%|███████▌  | 3697/4850 [17:00<05:16,  3.64it/s]

Epoch: 1, Loss: 3.4845194816589355


Processing epoch 00:  76%|███████▌  | 3698/4850 [17:00<05:16,  3.64it/s]

Epoch: 1, Loss: 2.6165828704833984


Processing epoch 00:  76%|███████▋  | 3699/4850 [17:00<05:15,  3.64it/s]

Epoch: 1, Loss: 3.9403815269470215


Processing epoch 00:  76%|███████▋  | 3700/4850 [17:00<05:15,  3.65it/s]

Epoch: 1, Loss: 3.6719212532043457


Processing epoch 00:  76%|███████▋  | 3701/4850 [17:01<05:15,  3.64it/s]

Epoch: 1, Loss: 3.0247883796691895


Processing epoch 00:  76%|███████▋  | 3702/4850 [17:01<05:23,  3.55it/s]

Epoch: 1, Loss: 2.472477912902832


Processing epoch 00:  76%|███████▋  | 3703/4850 [17:01<05:27,  3.51it/s]

Epoch: 1, Loss: 3.8160977363586426


Processing epoch 00:  76%|███████▋  | 3704/4850 [17:02<05:31,  3.46it/s]

Epoch: 1, Loss: 3.086127281188965


Processing epoch 00:  76%|███████▋  | 3705/4850 [17:02<05:36,  3.40it/s]

Epoch: 1, Loss: 3.0917749404907227


Processing epoch 00:  76%|███████▋  | 3706/4850 [17:02<05:30,  3.46it/s]

Epoch: 1, Loss: 3.8211679458618164


Processing epoch 00:  76%|███████▋  | 3707/4850 [17:02<05:31,  3.44it/s]

Epoch: 1, Loss: 2.4686427116394043


Processing epoch 00:  76%|███████▋  | 3708/4850 [17:03<05:34,  3.42it/s]

Epoch: 1, Loss: 3.2129569053649902


Processing epoch 00:  76%|███████▋  | 3709/4850 [17:03<05:32,  3.44it/s]

Epoch: 1, Loss: 3.548525810241699


Processing epoch 00:  76%|███████▋  | 3710/4850 [17:03<05:28,  3.47it/s]

Epoch: 1, Loss: 3.165591239929199


Processing epoch 00:  77%|███████▋  | 3711/4850 [17:04<05:33,  3.42it/s]

Epoch: 1, Loss: 2.564131736755371


Processing epoch 00:  77%|███████▋  | 3712/4850 [17:04<05:36,  3.38it/s]

Epoch: 1, Loss: 3.5396971702575684


Processing epoch 00:  77%|███████▋  | 3713/4850 [17:04<05:32,  3.42it/s]

Epoch: 1, Loss: 3.129328966140747


Processing epoch 00:  77%|███████▋  | 3714/4850 [17:04<05:27,  3.47it/s]

Epoch: 1, Loss: 3.4102706909179688


Processing epoch 00:  77%|███████▋  | 3715/4850 [17:05<05:22,  3.52it/s]

Epoch: 1, Loss: 2.616509437561035


Processing epoch 00:  77%|███████▋  | 3716/4850 [17:05<05:19,  3.55it/s]

Epoch: 1, Loss: 3.2967989444732666


Processing epoch 00:  77%|███████▋  | 3717/4850 [17:05<05:17,  3.57it/s]

Epoch: 1, Loss: 2.7855207920074463


Processing epoch 00:  77%|███████▋  | 3718/4850 [17:06<05:15,  3.59it/s]

Epoch: 1, Loss: 3.356978416442871


Processing epoch 00:  77%|███████▋  | 3719/4850 [17:06<05:15,  3.59it/s]

Epoch: 1, Loss: 3.7079930305480957


Processing epoch 00:  77%|███████▋  | 3720/4850 [17:06<05:13,  3.61it/s]

Epoch: 1, Loss: 2.9234378337860107


Processing epoch 00:  77%|███████▋  | 3721/4850 [17:06<05:11,  3.63it/s]

Epoch: 1, Loss: 3.5898327827453613


Processing epoch 00:  77%|███████▋  | 3722/4850 [17:07<05:11,  3.62it/s]

Epoch: 1, Loss: 3.7279067039489746


Processing epoch 00:  77%|███████▋  | 3723/4850 [17:07<05:16,  3.56it/s]

Epoch: 1, Loss: 2.7369484901428223


Processing epoch 00:  77%|███████▋  | 3724/4850 [17:07<05:14,  3.59it/s]

Epoch: 1, Loss: 2.631354808807373


Processing epoch 00:  77%|███████▋  | 3725/4850 [17:08<05:12,  3.60it/s]

Epoch: 1, Loss: 3.2880959510803223


Processing epoch 00:  77%|███████▋  | 3726/4850 [17:08<05:12,  3.60it/s]

Epoch: 1, Loss: 3.158707618713379


Processing epoch 00:  77%|███████▋  | 3727/4850 [17:08<05:11,  3.60it/s]

Epoch: 1, Loss: 3.773138999938965


Processing epoch 00:  77%|███████▋  | 3728/4850 [17:08<05:10,  3.62it/s]

Epoch: 1, Loss: 3.29648494720459


Processing epoch 00:  77%|███████▋  | 3729/4850 [17:09<05:09,  3.62it/s]

Epoch: 1, Loss: 3.589718818664551


Processing epoch 00:  77%|███████▋  | 3730/4850 [17:09<05:09,  3.62it/s]

Epoch: 1, Loss: 3.9176478385925293


Processing epoch 00:  77%|███████▋  | 3731/4850 [17:09<05:09,  3.62it/s]

Epoch: 1, Loss: 2.4433064460754395


Processing epoch 00:  77%|███████▋  | 3732/4850 [17:09<05:09,  3.62it/s]

Epoch: 1, Loss: 2.9167356491088867


Processing epoch 00:  77%|███████▋  | 3733/4850 [17:10<05:08,  3.62it/s]

Epoch: 1, Loss: 3.285539388656616


Processing epoch 00:  77%|███████▋  | 3734/4850 [17:10<05:10,  3.60it/s]

Epoch: 1, Loss: 3.50826358795166


Processing epoch 00:  77%|███████▋  | 3735/4850 [17:10<05:08,  3.61it/s]

Epoch: 1, Loss: 3.9637324810028076


Processing epoch 00:  77%|███████▋  | 3736/4850 [17:11<05:07,  3.62it/s]

Epoch: 1, Loss: 3.0512845516204834


Processing epoch 00:  77%|███████▋  | 3737/4850 [17:11<05:07,  3.62it/s]

Epoch: 1, Loss: 2.7857799530029297


Processing epoch 00:  77%|███████▋  | 3738/4850 [17:11<05:05,  3.64it/s]

Epoch: 1, Loss: 3.5604257583618164


Processing epoch 00:  77%|███████▋  | 3739/4850 [17:11<05:05,  3.63it/s]

Epoch: 1, Loss: 3.2507357597351074


Processing epoch 00:  77%|███████▋  | 3740/4850 [17:12<05:05,  3.63it/s]

Epoch: 1, Loss: 4.213446617126465


Processing epoch 00:  77%|███████▋  | 3741/4850 [17:12<05:05,  3.63it/s]

Epoch: 1, Loss: 2.596935510635376


Processing epoch 00:  77%|███████▋  | 3742/4850 [17:12<05:05,  3.63it/s]

Epoch: 1, Loss: 2.595048189163208


Processing epoch 00:  77%|███████▋  | 3743/4850 [17:12<05:04,  3.63it/s]

Epoch: 1, Loss: 3.5210046768188477


Processing epoch 00:  77%|███████▋  | 3744/4850 [17:13<05:09,  3.57it/s]

Epoch: 1, Loss: 3.148129940032959


Processing epoch 00:  77%|███████▋  | 3745/4850 [17:13<05:07,  3.59it/s]

Epoch: 1, Loss: 2.7821922302246094


Processing epoch 00:  77%|███████▋  | 3746/4850 [17:13<05:07,  3.59it/s]

Epoch: 1, Loss: 2.221531391143799


Processing epoch 00:  77%|███████▋  | 3747/4850 [17:14<05:05,  3.61it/s]

Epoch: 1, Loss: 2.7017576694488525


Processing epoch 00:  77%|███████▋  | 3748/4850 [17:14<05:05,  3.61it/s]

Epoch: 1, Loss: 4.41937255859375


Processing epoch 00:  77%|███████▋  | 3749/4850 [17:14<05:12,  3.52it/s]

Epoch: 1, Loss: 3.3285953998565674


Processing epoch 00:  77%|███████▋  | 3750/4850 [17:14<05:14,  3.49it/s]

Epoch: 1, Loss: 2.763364315032959


Processing epoch 00:  77%|███████▋  | 3751/4850 [17:15<05:14,  3.50it/s]

Epoch: 1, Loss: 3.3538661003112793


Processing epoch 00:  77%|███████▋  | 3752/4850 [17:15<05:16,  3.46it/s]

Epoch: 1, Loss: 3.501431465148926


Processing epoch 00:  77%|███████▋  | 3753/4850 [17:15<05:17,  3.45it/s]

Epoch: 1, Loss: 3.058244228363037


Processing epoch 00:  77%|███████▋  | 3754/4850 [17:16<05:16,  3.46it/s]

Epoch: 1, Loss: 2.9816083908081055


Processing epoch 00:  77%|███████▋  | 3755/4850 [17:16<05:13,  3.49it/s]

Epoch: 1, Loss: 3.99204421043396


Processing epoch 00:  77%|███████▋  | 3756/4850 [17:16<05:18,  3.43it/s]

Epoch: 1, Loss: 4.582956790924072


Processing epoch 00:  77%|███████▋  | 3757/4850 [17:17<05:21,  3.40it/s]

Epoch: 1, Loss: 2.9333443641662598


Processing epoch 00:  77%|███████▋  | 3758/4850 [17:17<05:27,  3.33it/s]

Epoch: 1, Loss: 3.7306971549987793


Processing epoch 00:  78%|███████▊  | 3759/4850 [17:17<05:27,  3.34it/s]

Epoch: 1, Loss: 3.176344871520996


Processing epoch 00:  78%|███████▊  | 3760/4850 [17:17<05:26,  3.34it/s]

Epoch: 1, Loss: 2.822218418121338


Processing epoch 00:  78%|███████▊  | 3761/4850 [17:18<05:18,  3.42it/s]

Epoch: 1, Loss: 3.5319881439208984


Processing epoch 00:  78%|███████▊  | 3762/4850 [17:18<05:11,  3.49it/s]

Epoch: 1, Loss: 2.868863105773926


Processing epoch 00:  78%|███████▊  | 3763/4850 [17:18<05:07,  3.53it/s]

Epoch: 1, Loss: 3.3333237171173096


Processing epoch 00:  78%|███████▊  | 3764/4850 [17:19<05:05,  3.56it/s]

Epoch: 1, Loss: 2.9431633949279785


Processing epoch 00:  78%|███████▊  | 3765/4850 [17:19<05:03,  3.57it/s]

Epoch: 1, Loss: 4.569365501403809


Processing epoch 00:  78%|███████▊  | 3766/4850 [17:19<05:02,  3.59it/s]

Epoch: 1, Loss: 2.626931667327881


Processing epoch 00:  78%|███████▊  | 3767/4850 [17:19<05:01,  3.59it/s]

Epoch: 1, Loss: 3.0971193313598633


Processing epoch 00:  78%|███████▊  | 3768/4850 [17:20<05:00,  3.60it/s]

Epoch: 1, Loss: 2.894019603729248


Processing epoch 00:  78%|███████▊  | 3769/4850 [17:20<04:59,  3.61it/s]

Epoch: 1, Loss: 2.8597798347473145


Processing epoch 00:  78%|███████▊  | 3770/4850 [17:20<04:59,  3.61it/s]

Epoch: 1, Loss: 3.744779586791992


Processing epoch 00:  78%|███████▊  | 3771/4850 [17:20<04:59,  3.60it/s]

Epoch: 1, Loss: 2.380117893218994


Processing epoch 00:  78%|███████▊  | 3772/4850 [17:21<05:00,  3.59it/s]

Epoch: 1, Loss: 2.5522589683532715


Processing epoch 00:  78%|███████▊  | 3773/4850 [17:21<04:58,  3.61it/s]

Epoch: 1, Loss: 2.655083656311035


Processing epoch 00:  78%|███████▊  | 3774/4850 [17:21<04:56,  3.63it/s]

Epoch: 1, Loss: 3.4462623596191406


Processing epoch 00:  78%|███████▊  | 3775/4850 [17:22<04:56,  3.63it/s]

Epoch: 1, Loss: 3.6275153160095215


Processing epoch 00:  78%|███████▊  | 3776/4850 [17:22<04:56,  3.62it/s]

Epoch: 1, Loss: 2.9139983654022217


Processing epoch 00:  78%|███████▊  | 3777/4850 [17:22<04:55,  3.63it/s]

Epoch: 1, Loss: 3.4305078983306885


Processing epoch 00:  78%|███████▊  | 3778/4850 [17:22<04:54,  3.63it/s]

Epoch: 1, Loss: 2.6415467262268066


Processing epoch 00:  78%|███████▊  | 3779/4850 [17:23<04:55,  3.62it/s]

Epoch: 1, Loss: 3.5624375343322754


Processing epoch 00:  78%|███████▊  | 3780/4850 [17:23<04:56,  3.61it/s]

Epoch: 1, Loss: 2.656362533569336


Processing epoch 00:  78%|███████▊  | 3781/4850 [17:23<04:55,  3.61it/s]

Epoch: 1, Loss: 2.5544464588165283


Processing epoch 00:  78%|███████▊  | 3782/4850 [17:24<05:00,  3.55it/s]

Epoch: 1, Loss: 3.461381673812866


Processing epoch 00:  78%|███████▊  | 3783/4850 [17:24<05:02,  3.53it/s]

Epoch: 1, Loss: 2.8334240913391113


Processing epoch 00:  78%|███████▊  | 3784/4850 [17:24<05:00,  3.55it/s]

Epoch: 1, Loss: 2.9929633140563965


Processing epoch 00:  78%|███████▊  | 3785/4850 [17:24<04:57,  3.58it/s]

Epoch: 1, Loss: 3.0619354248046875


Processing epoch 00:  78%|███████▊  | 3786/4850 [17:25<04:55,  3.60it/s]

Epoch: 1, Loss: 3.151327610015869


Processing epoch 00:  78%|███████▊  | 3787/4850 [17:25<04:53,  3.62it/s]

Epoch: 1, Loss: 3.05655574798584


Processing epoch 00:  78%|███████▊  | 3788/4850 [17:25<04:54,  3.61it/s]

Epoch: 1, Loss: 2.656165838241577


Processing epoch 00:  78%|███████▊  | 3789/4850 [17:25<04:53,  3.62it/s]

Epoch: 1, Loss: 3.114841938018799


Processing epoch 00:  78%|███████▊  | 3790/4850 [17:26<04:52,  3.62it/s]

Epoch: 1, Loss: 3.0034308433532715


Processing epoch 00:  78%|███████▊  | 3791/4850 [17:26<04:52,  3.62it/s]

Epoch: 1, Loss: 3.027568817138672


Processing epoch 00:  78%|███████▊  | 3792/4850 [17:26<04:52,  3.62it/s]

Epoch: 1, Loss: 2.6367313861846924


Processing epoch 00:  78%|███████▊  | 3793/4850 [17:27<04:52,  3.61it/s]

Epoch: 1, Loss: 4.274885654449463


Processing epoch 00:  78%|███████▊  | 3794/4850 [17:27<04:52,  3.61it/s]

Epoch: 1, Loss: 2.9789042472839355


Processing epoch 00:  78%|███████▊  | 3795/4850 [17:27<04:50,  3.63it/s]

Epoch: 1, Loss: 4.280026435852051


Processing epoch 00:  78%|███████▊  | 3796/4850 [17:27<04:51,  3.61it/s]

Epoch: 1, Loss: 3.5893030166625977


Processing epoch 00:  78%|███████▊  | 3797/4850 [17:28<04:58,  3.53it/s]

Epoch: 1, Loss: 3.8071699142456055


Processing epoch 00:  78%|███████▊  | 3798/4850 [17:28<04:59,  3.51it/s]

Epoch: 1, Loss: 3.0450754165649414


Processing epoch 00:  78%|███████▊  | 3799/4850 [17:28<05:00,  3.49it/s]

Epoch: 1, Loss: 3.079906940460205


Processing epoch 00:  78%|███████▊  | 3800/4850 [17:29<05:05,  3.44it/s]

Epoch: 1, Loss: 2.850341320037842


Processing epoch 00:  78%|███████▊  | 3801/4850 [17:29<05:05,  3.43it/s]

Epoch: 1, Loss: 2.697639226913452


Processing epoch 00:  78%|███████▊  | 3802/4850 [17:29<05:07,  3.41it/s]

Epoch: 1, Loss: 3.191802978515625


Processing epoch 00:  78%|███████▊  | 3803/4850 [17:29<05:04,  3.44it/s]

Epoch: 1, Loss: 3.9958837032318115


Processing epoch 00:  78%|███████▊  | 3804/4850 [17:30<05:07,  3.40it/s]

Epoch: 1, Loss: 4.133302688598633


Processing epoch 00:  78%|███████▊  | 3805/4850 [17:30<05:08,  3.39it/s]

Epoch: 1, Loss: 2.8164737224578857


Processing epoch 00:  78%|███████▊  | 3806/4850 [17:30<05:04,  3.42it/s]

Epoch: 1, Loss: 3.4419970512390137


Processing epoch 00:  78%|███████▊  | 3807/4850 [17:31<05:05,  3.41it/s]

Epoch: 1, Loss: 2.647134304046631


Processing epoch 00:  79%|███████▊  | 3808/4850 [17:31<05:00,  3.46it/s]

Epoch: 1, Loss: 3.2232789993286133


Processing epoch 00:  79%|███████▊  | 3809/4850 [17:31<04:56,  3.51it/s]

Epoch: 1, Loss: 3.5368542671203613


Processing epoch 00:  79%|███████▊  | 3810/4850 [17:31<04:53,  3.55it/s]

Epoch: 1, Loss: 3.119636058807373


Processing epoch 00:  79%|███████▊  | 3811/4850 [17:32<04:51,  3.56it/s]

Epoch: 1, Loss: 2.580538749694824


Processing epoch 00:  79%|███████▊  | 3812/4850 [17:32<04:50,  3.57it/s]

Epoch: 1, Loss: 3.01041579246521


Processing epoch 00:  79%|███████▊  | 3813/4850 [17:32<04:49,  3.59it/s]

Epoch: 1, Loss: 3.285438060760498


Processing epoch 00:  79%|███████▊  | 3814/4850 [17:33<04:47,  3.60it/s]

Epoch: 1, Loss: 2.9871926307678223


Processing epoch 00:  79%|███████▊  | 3815/4850 [17:33<04:48,  3.59it/s]

Epoch: 1, Loss: 3.2278590202331543


Processing epoch 00:  79%|███████▊  | 3816/4850 [17:33<04:47,  3.59it/s]

Epoch: 1, Loss: 3.3625988960266113


Processing epoch 00:  79%|███████▊  | 3817/4850 [17:33<04:46,  3.60it/s]

Epoch: 1, Loss: 2.5572826862335205


Processing epoch 00:  79%|███████▊  | 3818/4850 [17:34<04:45,  3.61it/s]

Epoch: 1, Loss: 4.270866394042969


Processing epoch 00:  79%|███████▊  | 3819/4850 [17:34<04:45,  3.61it/s]

Epoch: 1, Loss: 2.3093457221984863


Processing epoch 00:  79%|███████▉  | 3820/4850 [17:34<04:44,  3.62it/s]

Epoch: 1, Loss: 3.004502296447754


Processing epoch 00:  79%|███████▉  | 3821/4850 [17:34<04:44,  3.61it/s]

Epoch: 1, Loss: 2.8075385093688965


Processing epoch 00:  79%|███████▉  | 3822/4850 [17:35<04:44,  3.62it/s]

Epoch: 1, Loss: 3.8710618019104004


Processing epoch 00:  79%|███████▉  | 3823/4850 [17:35<04:44,  3.61it/s]

Epoch: 1, Loss: 2.7397351264953613


Processing epoch 00:  79%|███████▉  | 3824/4850 [17:35<04:44,  3.61it/s]

Epoch: 1, Loss: 3.444505453109741


Processing epoch 00:  79%|███████▉  | 3825/4850 [17:36<04:43,  3.62it/s]

Epoch: 1, Loss: 3.271289348602295


Processing epoch 00:  79%|███████▉  | 3826/4850 [17:36<04:42,  3.63it/s]

Epoch: 1, Loss: 3.7015857696533203


Processing epoch 00:  79%|███████▉  | 3827/4850 [17:36<04:43,  3.60it/s]

Epoch: 1, Loss: 2.9754157066345215


Processing epoch 00:  79%|███████▉  | 3828/4850 [17:36<04:43,  3.61it/s]

Epoch: 1, Loss: 3.275249481201172


Processing epoch 00:  79%|███████▉  | 3829/4850 [17:37<04:42,  3.61it/s]

Epoch: 1, Loss: 3.426957130432129


Processing epoch 00:  79%|███████▉  | 3830/4850 [17:37<04:44,  3.59it/s]

Epoch: 1, Loss: 3.806534767150879


Processing epoch 00:  79%|███████▉  | 3831/4850 [17:37<04:43,  3.59it/s]

Epoch: 1, Loss: 2.5416135787963867


Processing epoch 00:  79%|███████▉  | 3832/4850 [17:38<04:44,  3.58it/s]

Epoch: 1, Loss: 3.130422592163086


Processing epoch 00:  79%|███████▉  | 3833/4850 [17:38<04:42,  3.59it/s]

Epoch: 1, Loss: 3.046069860458374


Processing epoch 00:  79%|███████▉  | 3834/4850 [17:38<04:43,  3.58it/s]

Epoch: 1, Loss: 3.2879838943481445


Processing epoch 00:  79%|███████▉  | 3835/4850 [17:38<04:42,  3.60it/s]

Epoch: 1, Loss: 2.8126113414764404


Processing epoch 00:  79%|███████▉  | 3836/4850 [17:39<04:42,  3.59it/s]

Epoch: 1, Loss: 2.7702372074127197


Processing epoch 00:  79%|███████▉  | 3837/4850 [17:39<04:39,  3.63it/s]

Epoch: 1, Loss: 3.8607590198516846


Processing epoch 00:  79%|███████▉  | 3838/4850 [17:39<04:38,  3.63it/s]

Epoch: 1, Loss: 3.006258249282837


Processing epoch 00:  79%|███████▉  | 3839/4850 [17:39<04:38,  3.64it/s]

Epoch: 1, Loss: 3.243562698364258


Processing epoch 00:  79%|███████▉  | 3840/4850 [17:40<04:39,  3.62it/s]

Epoch: 1, Loss: 2.4906063079833984


Processing epoch 00:  79%|███████▉  | 3841/4850 [17:40<04:41,  3.59it/s]

Epoch: 1, Loss: 3.8972742557525635


Processing epoch 00:  79%|███████▉  | 3842/4850 [17:40<04:40,  3.59it/s]

Epoch: 1, Loss: 2.8384957313537598


Processing epoch 00:  79%|███████▉  | 3843/4850 [17:41<04:39,  3.60it/s]

Epoch: 1, Loss: 3.4284939765930176


Processing epoch 00:  79%|███████▉  | 3844/4850 [17:41<04:43,  3.55it/s]

Epoch: 1, Loss: 3.2335808277130127


Processing epoch 00:  79%|███████▉  | 3845/4850 [17:41<04:53,  3.43it/s]

Epoch: 1, Loss: 2.626801013946533


Processing epoch 00:  79%|███████▉  | 3846/4850 [17:41<04:52,  3.43it/s]

Epoch: 1, Loss: 3.5559334754943848


Processing epoch 00:  79%|███████▉  | 3847/4850 [17:42<04:55,  3.40it/s]

Epoch: 1, Loss: 3.1308093070983887


Processing epoch 00:  79%|███████▉  | 3848/4850 [17:42<04:59,  3.34it/s]

Epoch: 1, Loss: 3.7041053771972656


Processing epoch 00:  79%|███████▉  | 3849/4850 [17:42<04:58,  3.36it/s]

Epoch: 1, Loss: 3.73017954826355


Processing epoch 00:  79%|███████▉  | 3850/4850 [17:43<04:59,  3.34it/s]

Epoch: 1, Loss: 3.1261558532714844


Processing epoch 00:  79%|███████▉  | 3851/4850 [17:43<04:58,  3.34it/s]

Epoch: 1, Loss: 3.1977412700653076


Processing epoch 00:  79%|███████▉  | 3852/4850 [17:43<04:55,  3.38it/s]

Epoch: 1, Loss: 2.8078441619873047


Processing epoch 00:  79%|███████▉  | 3853/4850 [17:44<04:55,  3.37it/s]

Epoch: 1, Loss: 3.622485399246216


Processing epoch 00:  79%|███████▉  | 3854/4850 [17:44<04:55,  3.37it/s]

Epoch: 1, Loss: 3.5128493309020996


Processing epoch 00:  79%|███████▉  | 3855/4850 [17:44<04:52,  3.40it/s]

Epoch: 1, Loss: 2.6620922088623047


Processing epoch 00:  80%|███████▉  | 3856/4850 [17:44<04:45,  3.49it/s]

Epoch: 1, Loss: 3.5628581047058105


Processing epoch 00:  80%|███████▉  | 3857/4850 [17:45<04:42,  3.51it/s]

Epoch: 1, Loss: 2.432528257369995


Processing epoch 00:  80%|███████▉  | 3858/4850 [17:45<04:40,  3.54it/s]

Epoch: 1, Loss: 3.3322649002075195


Processing epoch 00:  80%|███████▉  | 3859/4850 [17:45<04:39,  3.54it/s]

Epoch: 1, Loss: 2.742593288421631


Processing epoch 00:  80%|███████▉  | 3860/4850 [17:46<04:36,  3.58it/s]

Epoch: 1, Loss: 2.6159965991973877


Processing epoch 00:  80%|███████▉  | 3861/4850 [17:46<04:35,  3.59it/s]

Epoch: 1, Loss: 2.84421443939209


Processing epoch 00:  80%|███████▉  | 3862/4850 [17:46<04:34,  3.61it/s]

Epoch: 1, Loss: 2.7711329460144043


Processing epoch 00:  80%|███████▉  | 3863/4850 [17:46<04:33,  3.60it/s]

Epoch: 1, Loss: 3.358123779296875


Processing epoch 00:  80%|███████▉  | 3864/4850 [17:47<04:33,  3.60it/s]

Epoch: 1, Loss: 3.5624396800994873


Processing epoch 00:  80%|███████▉  | 3865/4850 [17:47<04:33,  3.61it/s]

Epoch: 1, Loss: 3.311034679412842


Processing epoch 00:  80%|███████▉  | 3866/4850 [17:47<04:33,  3.60it/s]

Epoch: 1, Loss: 2.4640440940856934


Processing epoch 00:  80%|███████▉  | 3867/4850 [17:47<04:30,  3.63it/s]

Epoch: 1, Loss: 3.8276586532592773


Processing epoch 00:  80%|███████▉  | 3868/4850 [17:48<04:30,  3.63it/s]

Epoch: 1, Loss: 3.213930130004883


Processing epoch 00:  80%|███████▉  | 3869/4850 [17:48<04:31,  3.61it/s]

Epoch: 1, Loss: 3.305081367492676


Processing epoch 00:  80%|███████▉  | 3870/4850 [17:48<04:32,  3.59it/s]

Epoch: 1, Loss: 2.8580355644226074


Processing epoch 00:  80%|███████▉  | 3871/4850 [17:49<04:32,  3.59it/s]

Epoch: 1, Loss: 3.346029281616211


Processing epoch 00:  80%|███████▉  | 3872/4850 [17:49<04:31,  3.60it/s]

Epoch: 1, Loss: 3.1230857372283936


Processing epoch 00:  80%|███████▉  | 3873/4850 [17:49<04:30,  3.62it/s]

Epoch: 1, Loss: 3.2270994186401367


Processing epoch 00:  80%|███████▉  | 3874/4850 [17:49<04:30,  3.61it/s]

Epoch: 1, Loss: 3.424069404602051


Processing epoch 00:  80%|███████▉  | 3875/4850 [17:50<04:29,  3.62it/s]

Epoch: 1, Loss: 2.987272262573242


Processing epoch 00:  80%|███████▉  | 3876/4850 [17:50<04:29,  3.62it/s]

Epoch: 1, Loss: 3.4295268058776855


Processing epoch 00:  80%|███████▉  | 3877/4850 [17:50<04:29,  3.62it/s]

Epoch: 1, Loss: 3.6058669090270996


Processing epoch 00:  80%|███████▉  | 3878/4850 [17:51<04:28,  3.62it/s]

Epoch: 1, Loss: 3.657519817352295


Processing epoch 00:  80%|███████▉  | 3879/4850 [17:51<04:29,  3.61it/s]

Epoch: 1, Loss: 3.6362757682800293


Processing epoch 00:  80%|████████  | 3880/4850 [17:51<04:29,  3.60it/s]

Epoch: 1, Loss: 2.9837465286254883


Processing epoch 00:  80%|████████  | 3881/4850 [17:51<04:29,  3.60it/s]

Epoch: 1, Loss: 2.674070358276367


Processing epoch 00:  80%|████████  | 3882/4850 [17:52<04:27,  3.62it/s]

Epoch: 1, Loss: 2.9508438110351562


Processing epoch 00:  80%|████████  | 3883/4850 [17:52<04:27,  3.62it/s]

Epoch: 1, Loss: 4.074748516082764


Processing epoch 00:  80%|████████  | 3884/4850 [17:52<04:26,  3.63it/s]

Epoch: 1, Loss: 3.0604467391967773


Processing epoch 00:  80%|████████  | 3885/4850 [17:52<04:31,  3.55it/s]

Epoch: 1, Loss: 3.8679466247558594


Processing epoch 00:  80%|████████  | 3886/4850 [17:53<04:29,  3.58it/s]

Epoch: 1, Loss: 3.4045376777648926


Processing epoch 00:  80%|████████  | 3887/4850 [17:53<04:28,  3.59it/s]

Epoch: 1, Loss: 3.6917223930358887


Processing epoch 00:  80%|████████  | 3888/4850 [17:53<04:28,  3.59it/s]

Epoch: 1, Loss: 3.286614179611206


Processing epoch 00:  80%|████████  | 3889/4850 [17:54<04:27,  3.59it/s]

Epoch: 1, Loss: 3.667263984680176


Processing epoch 00:  80%|████████  | 3890/4850 [17:54<04:28,  3.58it/s]

Epoch: 1, Loss: 2.919619083404541


Processing epoch 00:  80%|████████  | 3891/4850 [17:54<04:30,  3.54it/s]

Epoch: 1, Loss: 2.765624523162842


Processing epoch 00:  80%|████████  | 3892/4850 [17:54<04:35,  3.48it/s]

Epoch: 1, Loss: 3.055875301361084


Processing epoch 00:  80%|████████  | 3893/4850 [17:55<04:41,  3.39it/s]

Epoch: 1, Loss: 3.604940176010132


Processing epoch 00:  80%|████████  | 3894/4850 [17:55<04:43,  3.37it/s]

Epoch: 1, Loss: 2.780966281890869


Processing epoch 00:  80%|████████  | 3895/4850 [17:55<04:44,  3.36it/s]

Epoch: 1, Loss: 2.7590150833129883


Processing epoch 00:  80%|████████  | 3896/4850 [17:56<04:41,  3.39it/s]

Epoch: 1, Loss: 2.9903714656829834


Processing epoch 00:  80%|████████  | 3897/4850 [17:56<04:42,  3.37it/s]

Epoch: 1, Loss: 2.726161479949951


Processing epoch 00:  80%|████████  | 3898/4850 [17:56<04:43,  3.36it/s]

Epoch: 1, Loss: 3.0156755447387695


Processing epoch 00:  80%|████████  | 3899/4850 [17:57<04:42,  3.37it/s]

Epoch: 1, Loss: 3.060570240020752


Processing epoch 00:  80%|████████  | 3900/4850 [17:57<04:40,  3.39it/s]

Epoch: 1, Loss: 3.163271427154541


Processing epoch 00:  80%|████████  | 3901/4850 [17:57<04:39,  3.40it/s]

Epoch: 1, Loss: 2.8724875450134277


Processing epoch 00:  80%|████████  | 3902/4850 [17:57<04:43,  3.34it/s]

Epoch: 1, Loss: 3.448401927947998


Processing epoch 00:  80%|████████  | 3903/4850 [17:58<04:40,  3.37it/s]

Epoch: 1, Loss: 2.566904067993164


Processing epoch 00:  80%|████████  | 3904/4850 [17:58<04:34,  3.44it/s]

Epoch: 1, Loss: 2.939950466156006


Processing epoch 00:  81%|████████  | 3905/4850 [17:58<04:30,  3.49it/s]

Epoch: 1, Loss: 3.065957546234131


Processing epoch 00:  81%|████████  | 3906/4850 [17:59<04:28,  3.52it/s]

Epoch: 1, Loss: 3.110020160675049


Processing epoch 00:  81%|████████  | 3907/4850 [17:59<04:25,  3.55it/s]

Epoch: 1, Loss: 3.312260627746582


Processing epoch 00:  81%|████████  | 3908/4850 [17:59<04:23,  3.57it/s]

Epoch: 1, Loss: 2.4954872131347656


Processing epoch 00:  81%|████████  | 3909/4850 [17:59<04:21,  3.60it/s]

Epoch: 1, Loss: 3.373623847961426


Processing epoch 00:  81%|████████  | 3910/4850 [18:00<04:20,  3.60it/s]

Epoch: 1, Loss: 3.6566998958587646


Processing epoch 00:  81%|████████  | 3911/4850 [18:00<04:20,  3.61it/s]

Epoch: 1, Loss: 3.305429697036743


Processing epoch 00:  81%|████████  | 3912/4850 [18:00<04:19,  3.61it/s]

Epoch: 1, Loss: 3.062929153442383


Processing epoch 00:  81%|████████  | 3913/4850 [18:01<04:19,  3.61it/s]

Epoch: 1, Loss: 2.8758387565612793


Processing epoch 00:  81%|████████  | 3914/4850 [18:01<04:20,  3.59it/s]

Epoch: 1, Loss: 3.040954828262329


Processing epoch 00:  81%|████████  | 3915/4850 [18:01<04:21,  3.58it/s]

Epoch: 1, Loss: 3.189913272857666


Processing epoch 00:  81%|████████  | 3916/4850 [18:01<04:19,  3.61it/s]

Epoch: 1, Loss: 3.6591029167175293


Processing epoch 00:  81%|████████  | 3917/4850 [18:02<04:21,  3.57it/s]

Epoch: 1, Loss: 3.4934072494506836


Processing epoch 00:  81%|████████  | 3918/4850 [18:02<04:21,  3.57it/s]

Epoch: 1, Loss: 3.29017972946167


Processing epoch 00:  81%|████████  | 3919/4850 [18:02<04:19,  3.58it/s]

Epoch: 1, Loss: 3.1494388580322266


Processing epoch 00:  81%|████████  | 3920/4850 [18:02<04:17,  3.61it/s]

Epoch: 1, Loss: 3.846132278442383


Processing epoch 00:  81%|████████  | 3921/4850 [18:03<04:16,  3.63it/s]

Epoch: 1, Loss: 3.398064613342285


Processing epoch 00:  81%|████████  | 3922/4850 [18:03<04:14,  3.64it/s]

Epoch: 1, Loss: 3.548129081726074


Processing epoch 00:  81%|████████  | 3923/4850 [18:03<04:14,  3.64it/s]

Epoch: 1, Loss: 2.5607080459594727


Processing epoch 00:  81%|████████  | 3924/4850 [18:04<04:14,  3.64it/s]

Epoch: 1, Loss: 3.512413263320923


Processing epoch 00:  81%|████████  | 3925/4850 [18:04<04:16,  3.61it/s]

Epoch: 1, Loss: 2.5939836502075195


Processing epoch 00:  81%|████████  | 3926/4850 [18:04<04:15,  3.61it/s]

Epoch: 1, Loss: 2.855776071548462


Processing epoch 00:  81%|████████  | 3927/4850 [18:04<04:16,  3.60it/s]

Epoch: 1, Loss: 3.6454124450683594


Processing epoch 00:  81%|████████  | 3928/4850 [18:05<04:16,  3.60it/s]

Epoch: 1, Loss: 2.584303379058838


Processing epoch 00:  81%|████████  | 3929/4850 [18:05<04:15,  3.61it/s]

Epoch: 1, Loss: 3.105454444885254


Processing epoch 00:  81%|████████  | 3930/4850 [18:05<04:14,  3.62it/s]

Epoch: 1, Loss: 3.3558945655822754


Processing epoch 00:  81%|████████  | 3931/4850 [18:06<04:14,  3.62it/s]

Epoch: 1, Loss: 3.231489896774292


Processing epoch 00:  81%|████████  | 3932/4850 [18:06<04:14,  3.61it/s]

Epoch: 1, Loss: 2.8092684745788574


Processing epoch 00:  81%|████████  | 3933/4850 [18:06<04:13,  3.61it/s]

Epoch: 1, Loss: 3.7381656169891357


Processing epoch 00:  81%|████████  | 3934/4850 [18:06<04:13,  3.61it/s]

Epoch: 1, Loss: 2.889103889465332


Processing epoch 00:  81%|████████  | 3935/4850 [18:07<04:13,  3.61it/s]

Epoch: 1, Loss: 3.391777515411377


Processing epoch 00:  81%|████████  | 3936/4850 [18:07<04:12,  3.61it/s]

Epoch: 1, Loss: 3.4369332790374756


Processing epoch 00:  81%|████████  | 3937/4850 [18:07<04:12,  3.61it/s]

Epoch: 1, Loss: 3.4380760192871094


Processing epoch 00:  81%|████████  | 3938/4850 [18:07<04:13,  3.60it/s]

Epoch: 1, Loss: 3.5229711532592773


Processing epoch 00:  81%|████████  | 3939/4850 [18:08<04:14,  3.58it/s]

Epoch: 1, Loss: 3.3187990188598633


Processing epoch 00:  81%|████████  | 3940/4850 [18:08<04:20,  3.50it/s]

Epoch: 1, Loss: 3.61702299118042


Processing epoch 00:  81%|████████▏ | 3941/4850 [18:08<04:21,  3.48it/s]

Epoch: 1, Loss: 2.4626972675323486


Processing epoch 00:  81%|████████▏ | 3942/4850 [18:09<04:19,  3.50it/s]

Epoch: 1, Loss: 3.5679001808166504


Processing epoch 00:  81%|████████▏ | 3943/4850 [18:09<04:21,  3.46it/s]

Epoch: 1, Loss: 3.0950350761413574


Processing epoch 00:  81%|████████▏ | 3944/4850 [18:09<04:22,  3.46it/s]

Epoch: 1, Loss: 3.4975199699401855


Processing epoch 00:  81%|████████▏ | 3945/4850 [18:09<04:25,  3.41it/s]

Epoch: 1, Loss: 2.75234317779541


Processing epoch 00:  81%|████████▏ | 3946/4850 [18:10<04:27,  3.37it/s]

Epoch: 1, Loss: 3.420215129852295


Processing epoch 00:  81%|████████▏ | 3947/4850 [18:10<04:30,  3.34it/s]

Epoch: 1, Loss: 3.2329421043395996


Processing epoch 00:  81%|████████▏ | 3948/4850 [18:10<04:26,  3.39it/s]

Epoch: 1, Loss: 3.447519302368164


Processing epoch 00:  81%|████████▏ | 3949/4850 [18:11<04:23,  3.42it/s]

Epoch: 1, Loss: 2.907500743865967


Processing epoch 00:  81%|████████▏ | 3950/4850 [18:11<04:27,  3.36it/s]

Epoch: 1, Loss: 3.870466709136963


Processing epoch 00:  81%|████████▏ | 3951/4850 [18:11<04:21,  3.44it/s]

Epoch: 1, Loss: 3.416072368621826


Processing epoch 00:  81%|████████▏ | 3952/4850 [18:12<04:17,  3.49it/s]

Epoch: 1, Loss: 3.8070449829101562


Processing epoch 00:  82%|████████▏ | 3953/4850 [18:12<04:13,  3.54it/s]

Epoch: 1, Loss: 2.733067274093628


Processing epoch 00:  82%|████████▏ | 3954/4850 [18:12<04:12,  3.55it/s]

Epoch: 1, Loss: 2.8908066749572754


Processing epoch 00:  82%|████████▏ | 3955/4850 [18:12<04:10,  3.58it/s]

Epoch: 1, Loss: 2.8798632621765137


Processing epoch 00:  82%|████████▏ | 3956/4850 [18:13<04:09,  3.59it/s]

Epoch: 1, Loss: 3.133791923522949


Processing epoch 00:  82%|████████▏ | 3957/4850 [18:13<04:07,  3.60it/s]

Epoch: 1, Loss: 3.9148170948028564


Processing epoch 00:  82%|████████▏ | 3958/4850 [18:13<04:10,  3.56it/s]

Epoch: 1, Loss: 3.3632466793060303


Processing epoch 00:  82%|████████▏ | 3959/4850 [18:13<04:08,  3.59it/s]

Epoch: 1, Loss: 2.7032668590545654


Processing epoch 00:  82%|████████▏ | 3960/4850 [18:14<04:07,  3.59it/s]

Epoch: 1, Loss: 2.5556325912475586


Processing epoch 00:  82%|████████▏ | 3961/4850 [18:14<04:07,  3.59it/s]

Epoch: 1, Loss: 3.1184444427490234


Processing epoch 00:  82%|████████▏ | 3962/4850 [18:14<04:06,  3.60it/s]

Epoch: 1, Loss: 2.596831798553467


Processing epoch 00:  82%|████████▏ | 3963/4850 [18:15<04:06,  3.60it/s]

Epoch: 1, Loss: 3.701104164123535


Processing epoch 00:  82%|████████▏ | 3964/4850 [18:15<04:05,  3.61it/s]

Epoch: 1, Loss: 2.9101269245147705


Processing epoch 00:  82%|████████▏ | 3965/4850 [18:15<04:04,  3.63it/s]

Epoch: 1, Loss: 3.164698600769043


Processing epoch 00:  82%|████████▏ | 3966/4850 [18:15<04:03,  3.63it/s]

Epoch: 1, Loss: 2.9953789710998535


Processing epoch 00:  82%|████████▏ | 3967/4850 [18:16<04:01,  3.65it/s]

Epoch: 1, Loss: 4.029207229614258


Processing epoch 00:  82%|████████▏ | 3968/4850 [18:16<04:01,  3.65it/s]

Epoch: 1, Loss: 4.300481796264648


Processing epoch 00:  82%|████████▏ | 3969/4850 [18:16<04:00,  3.66it/s]

Epoch: 1, Loss: 3.45046329498291


Processing epoch 00:  82%|████████▏ | 3970/4850 [18:17<04:01,  3.64it/s]

Epoch: 1, Loss: 3.411266326904297


Processing epoch 00:  82%|████████▏ | 3971/4850 [18:17<04:02,  3.62it/s]

Epoch: 1, Loss: 3.1350903511047363


Processing epoch 00:  82%|████████▏ | 3972/4850 [18:17<04:02,  3.61it/s]

Epoch: 1, Loss: 2.476557970046997


Processing epoch 00:  82%|████████▏ | 3973/4850 [18:17<04:04,  3.59it/s]

Epoch: 1, Loss: 2.4057087898254395


Processing epoch 00:  82%|████████▏ | 3974/4850 [18:18<04:03,  3.60it/s]

Epoch: 1, Loss: 3.308908462524414


Processing epoch 00:  82%|████████▏ | 3975/4850 [18:18<04:03,  3.59it/s]

Epoch: 1, Loss: 2.86263370513916


Processing epoch 00:  82%|████████▏ | 3976/4850 [18:18<04:02,  3.61it/s]

Epoch: 1, Loss: 3.0535764694213867


Processing epoch 00:  82%|████████▏ | 3977/4850 [18:18<04:01,  3.61it/s]

Epoch: 1, Loss: 3.2111191749572754


Processing epoch 00:  82%|████████▏ | 3978/4850 [18:19<04:01,  3.61it/s]

Epoch: 1, Loss: 2.7606706619262695


Processing epoch 00:  82%|████████▏ | 3979/4850 [18:19<04:00,  3.62it/s]

Epoch: 1, Loss: 3.8213562965393066


Processing epoch 00:  82%|████████▏ | 3980/4850 [18:19<04:00,  3.61it/s]

Epoch: 1, Loss: 2.3242111206054688


Processing epoch 00:  82%|████████▏ | 3981/4850 [18:20<04:00,  3.62it/s]

Epoch: 1, Loss: 2.643037796020508


Processing epoch 00:  82%|████████▏ | 3982/4850 [18:20<04:00,  3.61it/s]

Epoch: 1, Loss: 3.229517936706543


Processing epoch 00:  82%|████████▏ | 3983/4850 [18:20<04:00,  3.61it/s]

Epoch: 1, Loss: 3.7774996757507324


Processing epoch 00:  82%|████████▏ | 3984/4850 [18:20<04:01,  3.59it/s]

Epoch: 1, Loss: 4.951140403747559


Processing epoch 00:  82%|████████▏ | 3985/4850 [18:21<04:01,  3.58it/s]

Epoch: 1, Loss: 2.7242307662963867


Processing epoch 00:  82%|████████▏ | 3986/4850 [18:21<03:59,  3.61it/s]

Epoch: 1, Loss: 3.415090560913086


Processing epoch 00:  82%|████████▏ | 3987/4850 [18:21<04:05,  3.51it/s]

Epoch: 1, Loss: 2.90677809715271


Processing epoch 00:  82%|████████▏ | 3988/4850 [18:22<04:09,  3.46it/s]

Epoch: 1, Loss: 3.22236967086792


Processing epoch 00:  82%|████████▏ | 3989/4850 [18:22<04:15,  3.37it/s]

Epoch: 1, Loss: 2.281520366668701


Processing epoch 00:  82%|████████▏ | 3990/4850 [18:22<04:12,  3.41it/s]

Epoch: 1, Loss: 3.597365379333496


Processing epoch 00:  82%|████████▏ | 3991/4850 [18:22<04:09,  3.44it/s]

Epoch: 1, Loss: 2.3753843307495117


Processing epoch 00:  82%|████████▏ | 3992/4850 [18:23<04:11,  3.41it/s]

Epoch: 1, Loss: 2.8019704818725586


Processing epoch 00:  82%|████████▏ | 3993/4850 [18:23<04:12,  3.39it/s]

Epoch: 1, Loss: 3.221406936645508


Processing epoch 00:  82%|████████▏ | 3994/4850 [18:23<04:16,  3.34it/s]

Epoch: 1, Loss: 2.7739622592926025


Processing epoch 00:  82%|████████▏ | 3995/4850 [18:24<04:16,  3.33it/s]

Epoch: 1, Loss: 2.7376861572265625


Processing epoch 00:  82%|████████▏ | 3996/4850 [18:24<04:12,  3.39it/s]

Epoch: 1, Loss: 2.9079930782318115


Processing epoch 00:  82%|████████▏ | 3997/4850 [18:24<04:17,  3.31it/s]

Epoch: 1, Loss: 2.6134157180786133


Processing epoch 00:  82%|████████▏ | 3998/4850 [18:25<04:11,  3.39it/s]

Epoch: 1, Loss: 3.269247531890869


Processing epoch 00:  82%|████████▏ | 3999/4850 [18:25<04:06,  3.46it/s]

Epoch: 1, Loss: 3.1247148513793945


Processing epoch 00:  82%|████████▏ | 4000/4850 [18:25<04:03,  3.49it/s]

Epoch: 1, Loss: 3.182016372680664


Processing epoch 00:  82%|████████▏ | 4001/4850 [18:25<04:01,  3.51it/s]

Epoch: 1, Loss: 3.13625431060791


Processing epoch 00:  83%|████████▎ | 4002/4850 [18:26<04:02,  3.49it/s]

Epoch: 1, Loss: 4.169866561889648


Processing epoch 00:  83%|████████▎ | 4003/4850 [18:26<04:00,  3.52it/s]

Epoch: 1, Loss: 3.2364869117736816


Processing epoch 00:  83%|████████▎ | 4004/4850 [18:26<03:57,  3.56it/s]

Epoch: 1, Loss: 2.450747013092041


Processing epoch 00:  83%|████████▎ | 4005/4850 [18:26<03:55,  3.58it/s]

Epoch: 1, Loss: 3.3086013793945312


Processing epoch 00:  83%|████████▎ | 4006/4850 [18:27<03:55,  3.59it/s]

Epoch: 1, Loss: 3.0253827571868896


Processing epoch 00:  83%|████████▎ | 4007/4850 [18:27<03:58,  3.53it/s]

Epoch: 1, Loss: 3.0056822299957275


Processing epoch 00:  83%|████████▎ | 4008/4850 [18:27<03:56,  3.56it/s]

Epoch: 1, Loss: 3.119241714477539


Processing epoch 00:  83%|████████▎ | 4009/4850 [18:28<03:54,  3.59it/s]

Epoch: 1, Loss: 3.232499599456787


Processing epoch 00:  83%|████████▎ | 4010/4850 [18:28<03:53,  3.60it/s]

Epoch: 1, Loss: 2.721613883972168


Processing epoch 00:  83%|████████▎ | 4011/4850 [18:28<03:53,  3.60it/s]

Epoch: 1, Loss: 3.7072980403900146


Processing epoch 00:  83%|████████▎ | 4012/4850 [18:28<03:53,  3.59it/s]

Epoch: 1, Loss: 2.902193069458008


Processing epoch 00:  83%|████████▎ | 4013/4850 [18:29<03:54,  3.56it/s]

Epoch: 1, Loss: 2.6188762187957764


Processing epoch 00:  83%|████████▎ | 4014/4850 [18:29<03:53,  3.59it/s]

Epoch: 1, Loss: 2.8626294136047363


Processing epoch 00:  83%|████████▎ | 4015/4850 [18:29<03:51,  3.61it/s]

Epoch: 1, Loss: 4.2211480140686035


Processing epoch 00:  83%|████████▎ | 4016/4850 [18:30<03:51,  3.61it/s]

Epoch: 1, Loss: 2.7798473834991455


Processing epoch 00:  83%|████████▎ | 4017/4850 [18:30<03:51,  3.60it/s]

Epoch: 1, Loss: 2.398641586303711


Processing epoch 00:  83%|████████▎ | 4018/4850 [18:30<03:50,  3.61it/s]

Epoch: 1, Loss: 3.0034408569335938


Processing epoch 00:  83%|████████▎ | 4019/4850 [18:30<03:49,  3.62it/s]

Epoch: 1, Loss: 2.911458969116211


Processing epoch 00:  83%|████████▎ | 4020/4850 [18:31<03:50,  3.60it/s]

Epoch: 1, Loss: 3.4833288192749023


Processing epoch 00:  83%|████████▎ | 4021/4850 [18:31<03:49,  3.61it/s]

Epoch: 1, Loss: 3.6419548988342285


Processing epoch 00:  83%|████████▎ | 4022/4850 [18:31<03:49,  3.61it/s]

Epoch: 1, Loss: 2.8046681880950928


Processing epoch 00:  83%|████████▎ | 4023/4850 [18:31<03:49,  3.60it/s]

Epoch: 1, Loss: 2.638861894607544


Processing epoch 00:  83%|████████▎ | 4024/4850 [18:32<03:48,  3.61it/s]

Epoch: 1, Loss: 3.449035167694092


Processing epoch 00:  83%|████████▎ | 4025/4850 [18:32<03:48,  3.61it/s]

Epoch: 1, Loss: 2.861347198486328


Processing epoch 00:  83%|████████▎ | 4026/4850 [18:32<03:49,  3.59it/s]

Epoch: 1, Loss: 2.381469249725342


Processing epoch 00:  83%|████████▎ | 4027/4850 [18:33<03:47,  3.61it/s]

Epoch: 1, Loss: 2.8917407989501953


Processing epoch 00:  83%|████████▎ | 4028/4850 [18:33<03:48,  3.59it/s]

Epoch: 1, Loss: 2.645240306854248


Processing epoch 00:  83%|████████▎ | 4029/4850 [18:33<03:48,  3.59it/s]

Epoch: 1, Loss: 2.5362887382507324


Processing epoch 00:  83%|████████▎ | 4030/4850 [18:33<03:48,  3.58it/s]

Epoch: 1, Loss: 2.725898027420044


Processing epoch 00:  83%|████████▎ | 4031/4850 [18:34<03:49,  3.57it/s]

Epoch: 1, Loss: 3.7904391288757324


Processing epoch 00:  83%|████████▎ | 4032/4850 [18:34<03:48,  3.58it/s]

Epoch: 1, Loss: 2.956310272216797


Processing epoch 00:  83%|████████▎ | 4033/4850 [18:34<03:47,  3.59it/s]

Epoch: 1, Loss: 3.5007643699645996


Processing epoch 00:  83%|████████▎ | 4034/4850 [18:35<03:53,  3.50it/s]

Epoch: 1, Loss: 3.420048236846924


Processing epoch 00:  83%|████████▎ | 4035/4850 [18:35<03:53,  3.49it/s]

Epoch: 1, Loss: 3.1988301277160645


Processing epoch 00:  83%|████████▎ | 4036/4850 [18:35<03:53,  3.49it/s]

Epoch: 1, Loss: 2.9717776775360107


Processing epoch 00:  83%|████████▎ | 4037/4850 [18:35<03:53,  3.49it/s]

Epoch: 1, Loss: 3.1384339332580566


Processing epoch 00:  83%|████████▎ | 4038/4850 [18:36<03:51,  3.50it/s]

Epoch: 1, Loss: 3.1141915321350098


Processing epoch 00:  83%|████████▎ | 4039/4850 [18:36<03:52,  3.49it/s]

Epoch: 1, Loss: 4.771515369415283


Processing epoch 00:  83%|████████▎ | 4040/4850 [18:36<03:55,  3.45it/s]

Epoch: 1, Loss: 3.6892004013061523


Processing epoch 00:  83%|████████▎ | 4041/4850 [18:37<03:56,  3.42it/s]

Epoch: 1, Loss: 2.441713333129883


Processing epoch 00:  83%|████████▎ | 4042/4850 [18:37<03:54,  3.44it/s]

Epoch: 1, Loss: 4.069313049316406


Processing epoch 00:  83%|████████▎ | 4043/4850 [18:37<03:55,  3.43it/s]

Epoch: 1, Loss: 3.570258617401123


Processing epoch 00:  83%|████████▎ | 4044/4850 [18:37<03:57,  3.40it/s]

Epoch: 1, Loss: 2.804635524749756


Processing epoch 00:  83%|████████▎ | 4045/4850 [18:38<03:58,  3.37it/s]

Epoch: 1, Loss: 2.2898783683776855


Processing epoch 00:  83%|████████▎ | 4046/4850 [18:38<03:54,  3.42it/s]

Epoch: 1, Loss: 2.684758186340332


Processing epoch 00:  83%|████████▎ | 4047/4850 [18:38<03:50,  3.48it/s]

Epoch: 1, Loss: 2.762993335723877


Processing epoch 00:  83%|████████▎ | 4048/4850 [18:39<03:48,  3.52it/s]

Epoch: 1, Loss: 2.969187021255493


Processing epoch 00:  83%|████████▎ | 4049/4850 [18:39<03:45,  3.55it/s]

Epoch: 1, Loss: 3.3871400356292725


Processing epoch 00:  84%|████████▎ | 4050/4850 [18:39<03:44,  3.56it/s]

Epoch: 1, Loss: 3.3261513710021973


Processing epoch 00:  84%|████████▎ | 4051/4850 [18:39<03:42,  3.59it/s]

Epoch: 1, Loss: 2.986088275909424


Processing epoch 00:  84%|████████▎ | 4052/4850 [18:40<03:42,  3.58it/s]

Epoch: 1, Loss: 3.097219467163086


Processing epoch 00:  84%|████████▎ | 4053/4850 [18:40<03:41,  3.59it/s]

Epoch: 1, Loss: 3.347418785095215


Processing epoch 00:  84%|████████▎ | 4054/4850 [18:40<03:41,  3.60it/s]

Epoch: 1, Loss: 3.093606948852539


Processing epoch 00:  84%|████████▎ | 4055/4850 [18:41<03:40,  3.61it/s]

Epoch: 1, Loss: 3.5524587631225586


Processing epoch 00:  84%|████████▎ | 4056/4850 [18:41<03:40,  3.60it/s]

Epoch: 1, Loss: 2.386582374572754


Processing epoch 00:  84%|████████▎ | 4057/4850 [18:41<03:38,  3.63it/s]

Epoch: 1, Loss: 3.536041259765625


Processing epoch 00:  84%|████████▎ | 4058/4850 [18:41<03:39,  3.61it/s]

Epoch: 1, Loss: 3.2000861167907715


Processing epoch 00:  84%|████████▎ | 4059/4850 [18:42<03:39,  3.60it/s]

Epoch: 1, Loss: 2.7706093788146973


Processing epoch 00:  84%|████████▎ | 4060/4850 [18:42<03:38,  3.62it/s]

Epoch: 1, Loss: 2.742359161376953


Processing epoch 00:  84%|████████▎ | 4061/4850 [18:42<03:39,  3.59it/s]

Epoch: 1, Loss: 3.5606112480163574


Processing epoch 00:  84%|████████▍ | 4062/4850 [18:42<03:37,  3.62it/s]

Epoch: 1, Loss: 3.500091314315796


Processing epoch 00:  84%|████████▍ | 4063/4850 [18:43<03:37,  3.63it/s]

Epoch: 1, Loss: 3.4652934074401855


Processing epoch 00:  84%|████████▍ | 4064/4850 [18:43<03:37,  3.61it/s]

Epoch: 1, Loss: 3.381326198577881


Processing epoch 00:  84%|████████▍ | 4065/4850 [18:43<03:37,  3.61it/s]

Epoch: 1, Loss: 2.799638509750366


Processing epoch 00:  84%|████████▍ | 4066/4850 [18:44<03:36,  3.61it/s]

Epoch: 1, Loss: 2.888050079345703


Processing epoch 00:  84%|████████▍ | 4067/4850 [18:44<03:36,  3.61it/s]

Epoch: 1, Loss: 2.986499309539795


Processing epoch 00:  84%|████████▍ | 4068/4850 [18:44<03:36,  3.61it/s]

Epoch: 1, Loss: 3.035125970840454


Processing epoch 00:  84%|████████▍ | 4069/4850 [18:44<03:36,  3.61it/s]

Epoch: 1, Loss: 2.7572476863861084


Processing epoch 00:  84%|████████▍ | 4070/4850 [18:45<03:35,  3.61it/s]

Epoch: 1, Loss: 2.8516831398010254


Processing epoch 00:  84%|████████▍ | 4071/4850 [18:45<03:36,  3.59it/s]

Epoch: 1, Loss: 3.2005767822265625


Processing epoch 00:  84%|████████▍ | 4072/4850 [18:45<03:37,  3.57it/s]

Epoch: 1, Loss: 3.42828631401062


Processing epoch 00:  84%|████████▍ | 4073/4850 [18:46<03:37,  3.57it/s]

Epoch: 1, Loss: 2.366591453552246


Processing epoch 00:  84%|████████▍ | 4074/4850 [18:46<03:36,  3.59it/s]

Epoch: 1, Loss: 3.1567914485931396


Processing epoch 00:  84%|████████▍ | 4075/4850 [18:46<03:35,  3.60it/s]

Epoch: 1, Loss: 4.404224395751953


Processing epoch 00:  84%|████████▍ | 4076/4850 [18:46<03:34,  3.60it/s]

Epoch: 1, Loss: 2.991330146789551


Processing epoch 00:  84%|████████▍ | 4077/4850 [18:47<03:34,  3.60it/s]

Epoch: 1, Loss: 2.9193217754364014


Processing epoch 00:  84%|████████▍ | 4078/4850 [18:47<03:33,  3.61it/s]

Epoch: 1, Loss: 2.663750648498535


Processing epoch 00:  84%|████████▍ | 4079/4850 [18:47<03:33,  3.61it/s]

Epoch: 1, Loss: 3.826577663421631


Processing epoch 00:  84%|████████▍ | 4080/4850 [18:47<03:32,  3.62it/s]

Epoch: 1, Loss: 2.8879265785217285


Processing epoch 00:  84%|████████▍ | 4081/4850 [18:48<03:32,  3.61it/s]

Epoch: 1, Loss: 2.967951774597168


Processing epoch 00:  84%|████████▍ | 4082/4850 [18:48<03:38,  3.52it/s]

Epoch: 1, Loss: 3.677962303161621


Processing epoch 00:  84%|████████▍ | 4083/4850 [18:48<03:42,  3.45it/s]

Epoch: 1, Loss: 4.563260555267334


Processing epoch 00:  84%|████████▍ | 4084/4850 [18:49<03:45,  3.40it/s]

Epoch: 1, Loss: 2.679914951324463


Processing epoch 00:  84%|████████▍ | 4085/4850 [18:49<03:43,  3.42it/s]

Epoch: 1, Loss: 3.821871757507324


Processing epoch 00:  84%|████████▍ | 4086/4850 [18:49<03:40,  3.46it/s]

Epoch: 1, Loss: 3.3824963569641113


Processing epoch 00:  84%|████████▍ | 4087/4850 [18:50<03:39,  3.48it/s]

Epoch: 1, Loss: 2.4491991996765137


Processing epoch 00:  84%|████████▍ | 4088/4850 [18:50<03:38,  3.48it/s]

Epoch: 1, Loss: 2.1921234130859375


Processing epoch 00:  84%|████████▍ | 4089/4850 [18:50<03:38,  3.49it/s]

Epoch: 1, Loss: 3.452536106109619


Processing epoch 00:  84%|████████▍ | 4090/4850 [18:50<03:38,  3.47it/s]

Epoch: 1, Loss: 2.8840487003326416


Processing epoch 00:  84%|████████▍ | 4091/4850 [18:51<03:41,  3.42it/s]

Epoch: 1, Loss: 3.164909601211548


Processing epoch 00:  84%|████████▍ | 4092/4850 [18:51<03:45,  3.36it/s]

Epoch: 1, Loss: 3.1166443824768066


Processing epoch 00:  84%|████████▍ | 4093/4850 [18:51<03:47,  3.33it/s]

Epoch: 1, Loss: 3.110053539276123


Processing epoch 00:  84%|████████▍ | 4094/4850 [18:52<03:41,  3.41it/s]

Epoch: 1, Loss: 3.402181625366211


Processing epoch 00:  84%|████████▍ | 4095/4850 [18:52<03:38,  3.46it/s]

Epoch: 1, Loss: 3.2830848693847656


Processing epoch 00:  84%|████████▍ | 4096/4850 [18:52<03:35,  3.49it/s]

Epoch: 1, Loss: 2.794874668121338


Processing epoch 00:  84%|████████▍ | 4097/4850 [18:52<03:33,  3.53it/s]

Epoch: 1, Loss: 2.7687950134277344


Processing epoch 00:  84%|████████▍ | 4098/4850 [18:53<03:32,  3.55it/s]

Epoch: 1, Loss: 3.3248238563537598


Processing epoch 00:  85%|████████▍ | 4099/4850 [18:53<03:30,  3.58it/s]

Epoch: 1, Loss: 4.321780204772949


Processing epoch 00:  85%|████████▍ | 4100/4850 [18:53<03:28,  3.59it/s]

Epoch: 1, Loss: 3.0653326511383057


Processing epoch 00:  85%|████████▍ | 4101/4850 [18:54<03:28,  3.59it/s]

Epoch: 1, Loss: 3.203566551208496


Processing epoch 00:  85%|████████▍ | 4102/4850 [18:54<03:29,  3.58it/s]

Epoch: 1, Loss: 2.064159870147705


Processing epoch 00:  85%|████████▍ | 4103/4850 [18:54<03:28,  3.59it/s]

Epoch: 1, Loss: 2.7690465450286865


Processing epoch 00:  85%|████████▍ | 4104/4850 [18:54<03:27,  3.60it/s]

Epoch: 1, Loss: 2.413848400115967


Processing epoch 00:  85%|████████▍ | 4105/4850 [18:55<03:29,  3.56it/s]

Epoch: 1, Loss: 2.373518466949463


Processing epoch 00:  85%|████████▍ | 4106/4850 [18:55<03:28,  3.57it/s]

Epoch: 1, Loss: 3.124419689178467


Processing epoch 00:  85%|████████▍ | 4107/4850 [18:55<03:27,  3.58it/s]

Epoch: 1, Loss: 3.1485774517059326


Processing epoch 00:  85%|████████▍ | 4108/4850 [18:55<03:27,  3.58it/s]

Epoch: 1, Loss: 2.70546293258667


Processing epoch 00:  85%|████████▍ | 4109/4850 [18:56<03:26,  3.59it/s]

Epoch: 1, Loss: 3.4325060844421387


Processing epoch 00:  85%|████████▍ | 4110/4850 [18:56<03:26,  3.59it/s]

Epoch: 1, Loss: 2.9892773628234863


Processing epoch 00:  85%|████████▍ | 4111/4850 [18:56<03:27,  3.57it/s]

Epoch: 1, Loss: 3.2952322959899902


Processing epoch 00:  85%|████████▍ | 4112/4850 [18:57<03:25,  3.59it/s]

Epoch: 1, Loss: 2.9599006175994873


Processing epoch 00:  85%|████████▍ | 4113/4850 [18:57<03:24,  3.60it/s]

Epoch: 1, Loss: 3.2570905685424805


Processing epoch 00:  85%|████████▍ | 4114/4850 [18:57<03:24,  3.60it/s]

Epoch: 1, Loss: 3.2072558403015137


Processing epoch 00:  85%|████████▍ | 4115/4850 [18:57<03:24,  3.60it/s]

Epoch: 1, Loss: 2.697957754135132


Processing epoch 00:  85%|████████▍ | 4116/4850 [18:58<03:25,  3.56it/s]

Epoch: 1, Loss: 3.028426170349121


Processing epoch 00:  85%|████████▍ | 4117/4850 [18:58<03:24,  3.58it/s]

Epoch: 1, Loss: 3.4674253463745117


Processing epoch 00:  85%|████████▍ | 4118/4850 [18:58<03:23,  3.60it/s]

Epoch: 1, Loss: 2.9773354530334473


Processing epoch 00:  85%|████████▍ | 4119/4850 [18:59<03:23,  3.59it/s]

Epoch: 1, Loss: 2.850849151611328


Processing epoch 00:  85%|████████▍ | 4120/4850 [18:59<03:23,  3.59it/s]

Epoch: 1, Loss: 2.568635940551758


Processing epoch 00:  85%|████████▍ | 4121/4850 [18:59<03:22,  3.60it/s]

Epoch: 1, Loss: 2.996511220932007


Processing epoch 00:  85%|████████▍ | 4122/4850 [18:59<03:21,  3.61it/s]

Epoch: 1, Loss: 4.051056861877441


Processing epoch 00:  85%|████████▌ | 4123/4850 [19:00<03:21,  3.61it/s]

Epoch: 1, Loss: 2.9238314628601074


Processing epoch 00:  85%|████████▌ | 4124/4850 [19:00<03:22,  3.59it/s]

Epoch: 1, Loss: 2.7288105487823486


Processing epoch 00:  85%|████████▌ | 4125/4850 [19:00<03:21,  3.59it/s]

Epoch: 1, Loss: 2.70068097114563


Processing epoch 00:  85%|████████▌ | 4126/4850 [19:00<03:20,  3.60it/s]

Epoch: 1, Loss: 3.617788791656494


Processing epoch 00:  85%|████████▌ | 4127/4850 [19:01<03:22,  3.58it/s]

Epoch: 1, Loss: 2.8829398155212402


Processing epoch 00:  85%|████████▌ | 4128/4850 [19:01<03:22,  3.57it/s]

Epoch: 1, Loss: 2.833139419555664


Processing epoch 00:  85%|████████▌ | 4129/4850 [19:01<03:21,  3.57it/s]

Epoch: 1, Loss: 3.5879952907562256


Processing epoch 00:  85%|████████▌ | 4130/4850 [19:02<03:26,  3.49it/s]

Epoch: 1, Loss: 3.176361083984375


Processing epoch 00:  85%|████████▌ | 4131/4850 [19:02<03:29,  3.43it/s]

Epoch: 1, Loss: 3.0262413024902344


Processing epoch 00:  85%|████████▌ | 4132/4850 [19:02<03:28,  3.44it/s]

Epoch: 1, Loss: 3.323808193206787


Processing epoch 00:  85%|████████▌ | 4133/4850 [19:03<03:29,  3.43it/s]

Epoch: 1, Loss: 3.5362167358398438


Processing epoch 00:  85%|████████▌ | 4134/4850 [19:03<03:28,  3.43it/s]

Epoch: 1, Loss: 2.8239431381225586


Processing epoch 00:  85%|████████▌ | 4135/4850 [19:03<03:27,  3.44it/s]

Epoch: 1, Loss: 2.4282138347625732


Processing epoch 00:  85%|████████▌ | 4136/4850 [19:03<03:30,  3.39it/s]

Epoch: 1, Loss: 3.0375733375549316


Processing epoch 00:  85%|████████▌ | 4137/4850 [19:04<03:31,  3.37it/s]

Epoch: 1, Loss: 3.0057220458984375


Processing epoch 00:  85%|████████▌ | 4138/4850 [19:04<03:34,  3.32it/s]

Epoch: 1, Loss: 3.1042962074279785


Processing epoch 00:  85%|████████▌ | 4139/4850 [19:04<03:29,  3.40it/s]

Epoch: 1, Loss: 3.215280771255493


Processing epoch 00:  85%|████████▌ | 4140/4850 [19:05<03:30,  3.37it/s]

Epoch: 1, Loss: 2.4480538368225098


Processing epoch 00:  85%|████████▌ | 4141/4850 [19:05<03:31,  3.35it/s]

Epoch: 1, Loss: 2.9124646186828613


Processing epoch 00:  85%|████████▌ | 4142/4850 [19:05<03:26,  3.43it/s]

Epoch: 1, Loss: 3.629746437072754


Processing epoch 00:  85%|████████▌ | 4143/4850 [19:05<03:23,  3.48it/s]

Epoch: 1, Loss: 3.7590701580047607


Processing epoch 00:  85%|████████▌ | 4144/4850 [19:06<03:20,  3.52it/s]

Epoch: 1, Loss: 3.901822566986084


Processing epoch 00:  85%|████████▌ | 4145/4850 [19:06<03:20,  3.51it/s]

Epoch: 1, Loss: 2.7407279014587402


Processing epoch 00:  85%|████████▌ | 4146/4850 [19:06<03:18,  3.54it/s]

Epoch: 1, Loss: 2.5299530029296875


Processing epoch 00:  86%|████████▌ | 4147/4850 [19:07<03:20,  3.51it/s]

Epoch: 1, Loss: 3.4136433601379395


Processing epoch 00:  86%|████████▌ | 4148/4850 [19:07<03:18,  3.55it/s]

Epoch: 1, Loss: 2.499622344970703


Processing epoch 00:  86%|████████▌ | 4149/4850 [19:07<03:16,  3.57it/s]

Epoch: 1, Loss: 3.1922049522399902


Processing epoch 00:  86%|████████▌ | 4150/4850 [19:07<03:15,  3.58it/s]

Epoch: 1, Loss: 2.598569393157959


Processing epoch 00:  86%|████████▌ | 4151/4850 [19:08<03:15,  3.58it/s]

Epoch: 1, Loss: 3.6317224502563477


Processing epoch 00:  86%|████████▌ | 4152/4850 [19:08<03:13,  3.60it/s]

Epoch: 1, Loss: 4.489069938659668


Processing epoch 00:  86%|████████▌ | 4153/4850 [19:08<03:13,  3.60it/s]

Epoch: 1, Loss: 3.45939302444458


Processing epoch 00:  86%|████████▌ | 4154/4850 [19:09<03:13,  3.61it/s]

Epoch: 1, Loss: 2.530247688293457


Processing epoch 00:  86%|████████▌ | 4155/4850 [19:09<03:12,  3.61it/s]

Epoch: 1, Loss: 2.596924304962158


Processing epoch 00:  86%|████████▌ | 4156/4850 [19:09<03:11,  3.61it/s]

Epoch: 1, Loss: 3.04274582862854


Processing epoch 00:  86%|████████▌ | 4157/4850 [19:09<03:11,  3.62it/s]

Epoch: 1, Loss: 2.9678258895874023


Processing epoch 00:  86%|████████▌ | 4158/4850 [19:10<03:11,  3.61it/s]

Epoch: 1, Loss: 2.4446654319763184


Processing epoch 00:  86%|████████▌ | 4159/4850 [19:10<03:11,  3.61it/s]

Epoch: 1, Loss: 2.721531391143799


Processing epoch 00:  86%|████████▌ | 4160/4850 [19:10<03:11,  3.61it/s]

Epoch: 1, Loss: 3.100557804107666


Processing epoch 00:  86%|████████▌ | 4161/4850 [19:10<03:10,  3.62it/s]

Epoch: 1, Loss: 3.1752734184265137


Processing epoch 00:  86%|████████▌ | 4162/4850 [19:11<03:10,  3.62it/s]

Epoch: 1, Loss: 3.0893850326538086


Processing epoch 00:  86%|████████▌ | 4163/4850 [19:11<03:11,  3.59it/s]

Epoch: 1, Loss: 3.175969362258911


Processing epoch 00:  86%|████████▌ | 4164/4850 [19:11<03:11,  3.59it/s]

Epoch: 1, Loss: 3.066433906555176


Processing epoch 00:  86%|████████▌ | 4165/4850 [19:12<03:10,  3.60it/s]

Epoch: 1, Loss: 3.119932174682617


Processing epoch 00:  86%|████████▌ | 4166/4850 [19:12<03:09,  3.62it/s]

Epoch: 1, Loss: 4.460439682006836


Processing epoch 00:  86%|████████▌ | 4167/4850 [19:12<03:08,  3.62it/s]

Epoch: 1, Loss: 3.2643356323242188


Processing epoch 00:  86%|████████▌ | 4168/4850 [19:12<03:08,  3.62it/s]

Epoch: 1, Loss: 3.2870922088623047


Processing epoch 00:  86%|████████▌ | 4169/4850 [19:13<03:08,  3.62it/s]

Epoch: 1, Loss: 3.309535503387451


Processing epoch 00:  86%|████████▌ | 4170/4850 [19:13<03:08,  3.61it/s]

Epoch: 1, Loss: 3.4673027992248535


Processing epoch 00:  86%|████████▌ | 4171/4850 [19:13<03:10,  3.56it/s]

Epoch: 1, Loss: 2.770631790161133


Processing epoch 00:  86%|████████▌ | 4172/4850 [19:14<03:09,  3.58it/s]

Epoch: 1, Loss: 2.648789882659912


Processing epoch 00:  86%|████████▌ | 4173/4850 [19:14<03:08,  3.59it/s]

Epoch: 1, Loss: 3.083944320678711


Processing epoch 00:  86%|████████▌ | 4174/4850 [19:14<03:07,  3.61it/s]

Epoch: 1, Loss: 3.94439697265625


Processing epoch 00:  86%|████████▌ | 4175/4850 [19:14<03:07,  3.61it/s]

Epoch: 1, Loss: 3.399782180786133


Processing epoch 00:  86%|████████▌ | 4176/4850 [19:15<03:06,  3.61it/s]

Epoch: 1, Loss: 3.009646415710449


Processing epoch 00:  86%|████████▌ | 4177/4850 [19:15<03:06,  3.60it/s]

Epoch: 1, Loss: 2.5196566581726074


Processing epoch 00:  86%|████████▌ | 4178/4850 [19:15<03:09,  3.54it/s]

Epoch: 1, Loss: 3.1084842681884766


Processing epoch 00:  86%|████████▌ | 4179/4850 [19:16<03:12,  3.48it/s]

Epoch: 1, Loss: 2.8752994537353516


Processing epoch 00:  86%|████████▌ | 4180/4850 [19:16<03:13,  3.46it/s]

Epoch: 1, Loss: 2.827653169631958


Processing epoch 00:  86%|████████▌ | 4181/4850 [19:16<03:15,  3.42it/s]

Epoch: 1, Loss: 3.4972786903381348


Processing epoch 00:  86%|████████▌ | 4182/4850 [19:16<03:18,  3.37it/s]

Epoch: 1, Loss: 3.001911163330078


Processing epoch 00:  86%|████████▌ | 4183/4850 [19:17<03:18,  3.37it/s]

Epoch: 1, Loss: 2.5132369995117188


Processing epoch 00:  86%|████████▋ | 4184/4850 [19:17<03:17,  3.38it/s]

Epoch: 1, Loss: 3.4460134506225586


Processing epoch 00:  86%|████████▋ | 4185/4850 [19:17<03:16,  3.39it/s]

Epoch: 1, Loss: 3.560467481613159


Processing epoch 00:  86%|████████▋ | 4186/4850 [19:18<03:14,  3.41it/s]

Epoch: 1, Loss: 3.498046875


Processing epoch 00:  86%|████████▋ | 4187/4850 [19:18<03:17,  3.36it/s]

Epoch: 1, Loss: 2.2755565643310547


Processing epoch 00:  86%|████████▋ | 4188/4850 [19:18<03:17,  3.35it/s]

Epoch: 1, Loss: 3.1680397987365723


Processing epoch 00:  86%|████████▋ | 4189/4850 [19:18<03:13,  3.42it/s]

Epoch: 1, Loss: 3.0441455841064453


Processing epoch 00:  86%|████████▋ | 4190/4850 [19:19<03:09,  3.48it/s]

Epoch: 1, Loss: 3.17067289352417


Processing epoch 00:  86%|████████▋ | 4191/4850 [19:19<03:06,  3.53it/s]

Epoch: 1, Loss: 3.102757453918457


Processing epoch 00:  86%|████████▋ | 4192/4850 [19:19<03:05,  3.55it/s]

Epoch: 1, Loss: 3.163402795791626


Processing epoch 00:  86%|████████▋ | 4193/4850 [19:20<03:04,  3.56it/s]

Epoch: 1, Loss: 2.7289726734161377


Processing epoch 00:  86%|████████▋ | 4194/4850 [19:20<03:03,  3.57it/s]

Epoch: 1, Loss: 2.620244264602661


Processing epoch 00:  86%|████████▋ | 4195/4850 [19:20<03:02,  3.59it/s]

Epoch: 1, Loss: 2.413951873779297


Processing epoch 00:  87%|████████▋ | 4196/4850 [19:20<03:02,  3.59it/s]

Epoch: 1, Loss: 2.950261116027832


Processing epoch 00:  87%|████████▋ | 4197/4850 [19:21<03:01,  3.60it/s]

Epoch: 1, Loss: 2.8806185722351074


Processing epoch 00:  87%|████████▋ | 4198/4850 [19:21<02:59,  3.62it/s]

Epoch: 1, Loss: 4.102831840515137


Processing epoch 00:  87%|████████▋ | 4199/4850 [19:21<02:59,  3.62it/s]

Epoch: 1, Loss: 3.5176851749420166


Processing epoch 00:  87%|████████▋ | 4200/4850 [19:22<03:02,  3.57it/s]

Epoch: 1, Loss: 2.9959583282470703


Processing epoch 00:  87%|████████▋ | 4201/4850 [19:22<03:01,  3.58it/s]

Epoch: 1, Loss: 3.2101447582244873


Processing epoch 00:  87%|████████▋ | 4202/4850 [19:22<03:00,  3.58it/s]

Epoch: 1, Loss: 2.733905792236328


Processing epoch 00:  87%|████████▋ | 4203/4850 [19:22<03:00,  3.58it/s]

Epoch: 1, Loss: 2.7553091049194336


Processing epoch 00:  87%|████████▋ | 4204/4850 [19:23<02:59,  3.59it/s]

Epoch: 1, Loss: 2.707148551940918


Processing epoch 00:  87%|████████▋ | 4205/4850 [19:23<02:59,  3.60it/s]

Epoch: 1, Loss: 2.314175605773926


Processing epoch 00:  87%|████████▋ | 4206/4850 [19:23<02:58,  3.61it/s]

Epoch: 1, Loss: 3.6371588706970215


Processing epoch 00:  87%|████████▋ | 4207/4850 [19:23<02:58,  3.61it/s]

Epoch: 1, Loss: 2.811244487762451


Processing epoch 00:  87%|████████▋ | 4208/4850 [19:24<02:59,  3.59it/s]

Epoch: 1, Loss: 3.1295042037963867


Processing epoch 00:  87%|████████▋ | 4209/4850 [19:24<02:59,  3.58it/s]

Epoch: 1, Loss: 2.991347312927246


Processing epoch 00:  87%|████████▋ | 4210/4850 [19:24<02:58,  3.59it/s]

Epoch: 1, Loss: 2.404301166534424


Processing epoch 00:  87%|████████▋ | 4211/4850 [19:25<02:59,  3.56it/s]

Epoch: 1, Loss: 3.9464645385742188


Processing epoch 00:  87%|████████▋ | 4212/4850 [19:25<02:58,  3.58it/s]

Epoch: 1, Loss: 2.7829833030700684


Processing epoch 00:  87%|████████▋ | 4213/4850 [19:25<02:56,  3.61it/s]

Epoch: 1, Loss: 3.203099250793457


Processing epoch 00:  87%|████████▋ | 4214/4850 [19:25<02:55,  3.62it/s]

Epoch: 1, Loss: 3.17859148979187


Processing epoch 00:  87%|████████▋ | 4215/4850 [19:26<02:54,  3.63it/s]

Epoch: 1, Loss: 3.975277900695801


Processing epoch 00:  87%|████████▋ | 4216/4850 [19:26<02:54,  3.63it/s]

Epoch: 1, Loss: 2.875858783721924


Processing epoch 00:  87%|████████▋ | 4217/4850 [19:26<02:54,  3.63it/s]

Epoch: 1, Loss: 2.9129042625427246


Processing epoch 00:  87%|████████▋ | 4218/4850 [19:27<02:54,  3.63it/s]

Epoch: 1, Loss: 2.516411304473877


Processing epoch 00:  87%|████████▋ | 4219/4850 [19:27<02:53,  3.63it/s]

Epoch: 1, Loss: 4.194411754608154


Processing epoch 00:  87%|████████▋ | 4220/4850 [19:27<02:54,  3.60it/s]

Epoch: 1, Loss: 2.6937193870544434


Processing epoch 00:  87%|████████▋ | 4221/4850 [19:27<02:55,  3.59it/s]

Epoch: 1, Loss: 3.578517436981201


Processing epoch 00:  87%|████████▋ | 4222/4850 [19:28<02:55,  3.58it/s]

Epoch: 1, Loss: 2.770297050476074


Processing epoch 00:  87%|████████▋ | 4223/4850 [19:28<02:55,  3.56it/s]

Epoch: 1, Loss: 3.4997353553771973


Processing epoch 00:  87%|████████▋ | 4224/4850 [19:28<02:55,  3.58it/s]

Epoch: 1, Loss: 2.7910361289978027


Processing epoch 00:  87%|████████▋ | 4225/4850 [19:28<02:58,  3.50it/s]

Epoch: 1, Loss: 2.1815128326416016


Processing epoch 00:  87%|████████▋ | 4226/4850 [19:29<02:59,  3.47it/s]

Epoch: 1, Loss: 3.3885293006896973


Processing epoch 00:  87%|████████▋ | 4227/4850 [19:29<03:00,  3.46it/s]

Epoch: 1, Loss: 3.0694167613983154


Processing epoch 00:  87%|████████▋ | 4228/4850 [19:29<03:02,  3.41it/s]

Epoch: 1, Loss: 2.9479873180389404


Processing epoch 00:  87%|████████▋ | 4229/4850 [19:30<03:03,  3.39it/s]

Epoch: 1, Loss: 2.353454113006592


Processing epoch 00:  87%|████████▋ | 4230/4850 [19:30<03:01,  3.41it/s]

Epoch: 1, Loss: 3.6237993240356445


Processing epoch 00:  87%|████████▋ | 4231/4850 [19:30<03:02,  3.39it/s]

Epoch: 1, Loss: 5.115443229675293


Processing epoch 00:  87%|████████▋ | 4232/4850 [19:31<03:04,  3.35it/s]

Epoch: 1, Loss: 2.6072731018066406


Processing epoch 00:  87%|████████▋ | 4233/4850 [19:31<03:05,  3.33it/s]

Epoch: 1, Loss: 2.700467586517334


Processing epoch 00:  87%|████████▋ | 4234/4850 [19:31<03:03,  3.35it/s]

Epoch: 1, Loss: 2.2966339588165283


Processing epoch 00:  87%|████████▋ | 4235/4850 [19:31<03:01,  3.39it/s]

Epoch: 1, Loss: 3.1105093955993652


Processing epoch 00:  87%|████████▋ | 4236/4850 [19:32<03:01,  3.38it/s]

Epoch: 1, Loss: 3.0250802040100098


Processing epoch 00:  87%|████████▋ | 4237/4850 [19:32<02:58,  3.44it/s]

Epoch: 1, Loss: 3.172466516494751


Processing epoch 00:  87%|████████▋ | 4238/4850 [19:32<02:55,  3.49it/s]

Epoch: 1, Loss: 2.902634620666504


Processing epoch 00:  87%|████████▋ | 4239/4850 [19:33<02:53,  3.52it/s]

Epoch: 1, Loss: 3.2124152183532715


Processing epoch 00:  87%|████████▋ | 4240/4850 [19:33<02:52,  3.53it/s]

Epoch: 1, Loss: 3.098456382751465


Processing epoch 00:  87%|████████▋ | 4241/4850 [19:33<02:51,  3.55it/s]

Epoch: 1, Loss: 3.4226646423339844


Processing epoch 00:  87%|████████▋ | 4242/4850 [19:33<02:50,  3.56it/s]

Epoch: 1, Loss: 2.9567670822143555


Processing epoch 00:  87%|████████▋ | 4243/4850 [19:34<02:49,  3.58it/s]

Epoch: 1, Loss: 2.6781322956085205


Processing epoch 00:  88%|████████▊ | 4244/4850 [19:34<02:50,  3.55it/s]

Epoch: 1, Loss: 3.2046046257019043


Processing epoch 00:  88%|████████▊ | 4245/4850 [19:34<02:49,  3.57it/s]

Epoch: 1, Loss: 3.0483899116516113


Processing epoch 00:  88%|████████▊ | 4246/4850 [19:35<02:48,  3.59it/s]

Epoch: 1, Loss: 3.6567325592041016


Processing epoch 00:  88%|████████▊ | 4247/4850 [19:35<02:47,  3.60it/s]

Epoch: 1, Loss: 2.9186172485351562


Processing epoch 00:  88%|████████▊ | 4248/4850 [19:35<02:47,  3.59it/s]

Epoch: 1, Loss: 2.145461082458496


Processing epoch 00:  88%|████████▊ | 4249/4850 [19:35<02:47,  3.60it/s]

Epoch: 1, Loss: 3.1945080757141113


Processing epoch 00:  88%|████████▊ | 4250/4850 [19:36<02:46,  3.61it/s]

Epoch: 1, Loss: 2.799386978149414


Processing epoch 00:  88%|████████▊ | 4251/4850 [19:36<02:45,  3.62it/s]

Epoch: 1, Loss: 3.0425219535827637


Processing epoch 00:  88%|████████▊ | 4252/4850 [19:36<02:45,  3.62it/s]

Epoch: 1, Loss: 2.6538243293762207


Processing epoch 00:  88%|████████▊ | 4253/4850 [19:36<02:45,  3.62it/s]

Epoch: 1, Loss: 2.9698071479797363


Processing epoch 00:  88%|████████▊ | 4254/4850 [19:37<02:44,  3.62it/s]

Epoch: 1, Loss: 2.9244604110717773


Processing epoch 00:  88%|████████▊ | 4255/4850 [19:37<02:45,  3.59it/s]

Epoch: 1, Loss: 3.490506887435913


Processing epoch 00:  88%|████████▊ | 4256/4850 [19:37<02:45,  3.59it/s]

Epoch: 1, Loss: 2.689441680908203


Processing epoch 00:  88%|████████▊ | 4257/4850 [19:38<02:44,  3.60it/s]

Epoch: 1, Loss: 2.960866928100586


Processing epoch 00:  88%|████████▊ | 4258/4850 [19:38<02:45,  3.59it/s]

Epoch: 1, Loss: 2.996633529663086


Processing epoch 00:  88%|████████▊ | 4259/4850 [19:38<02:45,  3.58it/s]

Epoch: 1, Loss: 3.3927226066589355


Processing epoch 00:  88%|████████▊ | 4260/4850 [19:38<02:43,  3.60it/s]

Epoch: 1, Loss: 3.405508041381836


Processing epoch 00:  88%|████████▊ | 4261/4850 [19:39<02:43,  3.60it/s]

Epoch: 1, Loss: 2.3885574340820312


Processing epoch 00:  88%|████████▊ | 4262/4850 [19:39<02:42,  3.61it/s]

Epoch: 1, Loss: 2.832756757736206


Processing epoch 00:  88%|████████▊ | 4263/4850 [19:39<02:42,  3.62it/s]

Epoch: 1, Loss: 3.0483264923095703


Processing epoch 00:  88%|████████▊ | 4264/4850 [19:40<02:41,  3.62it/s]

Epoch: 1, Loss: 2.783641815185547


Processing epoch 00:  88%|████████▊ | 4265/4850 [19:40<02:41,  3.62it/s]

Epoch: 1, Loss: 3.0953142642974854


Processing epoch 00:  88%|████████▊ | 4266/4850 [19:40<02:41,  3.61it/s]

Epoch: 1, Loss: 3.402843475341797


Processing epoch 00:  88%|████████▊ | 4267/4850 [19:40<02:41,  3.60it/s]

Epoch: 1, Loss: 3.7348618507385254


Processing epoch 00:  88%|████████▊ | 4268/4850 [19:41<02:41,  3.60it/s]

Epoch: 1, Loss: 3.0183305740356445


Processing epoch 00:  88%|████████▊ | 4269/4850 [19:41<02:42,  3.58it/s]

Epoch: 1, Loss: 3.2828431129455566


Processing epoch 00:  88%|████████▊ | 4270/4850 [19:41<02:42,  3.57it/s]

Epoch: 1, Loss: 3.2686142921447754


Processing epoch 00:  88%|████████▊ | 4271/4850 [19:41<02:41,  3.59it/s]

Epoch: 1, Loss: 2.6978070735931396


Processing epoch 00:  88%|████████▊ | 4272/4850 [19:42<02:40,  3.60it/s]

Epoch: 1, Loss: 3.419821262359619


Processing epoch 00:  88%|████████▊ | 4273/4850 [19:42<02:44,  3.50it/s]

Epoch: 1, Loss: 2.746309757232666


Processing epoch 00:  88%|████████▊ | 4274/4850 [19:42<02:45,  3.47it/s]

Epoch: 1, Loss: 2.8698959350585938


Processing epoch 00:  88%|████████▊ | 4275/4850 [19:43<02:44,  3.49it/s]

Epoch: 1, Loss: 2.3579344749450684


Processing epoch 00:  88%|████████▊ | 4276/4850 [19:43<02:44,  3.50it/s]

Epoch: 1, Loss: 3.8668553829193115


Processing epoch 00:  88%|████████▊ | 4277/4850 [19:43<02:47,  3.43it/s]

Epoch: 1, Loss: 2.484487295150757


Processing epoch 00:  88%|████████▊ | 4278/4850 [19:44<02:45,  3.45it/s]

Epoch: 1, Loss: 2.886324644088745


Processing epoch 00:  88%|████████▊ | 4279/4850 [19:44<02:46,  3.43it/s]

Epoch: 1, Loss: 2.4540905952453613


Processing epoch 00:  88%|████████▊ | 4280/4850 [19:44<02:48,  3.39it/s]

Epoch: 1, Loss: 3.1872639656066895


Processing epoch 00:  88%|████████▊ | 4281/4850 [19:44<02:49,  3.36it/s]

Epoch: 1, Loss: 3.4788382053375244


Processing epoch 00:  88%|████████▊ | 4282/4850 [19:45<02:51,  3.32it/s]

Epoch: 1, Loss: 2.5045418739318848


Processing epoch 00:  88%|████████▊ | 4283/4850 [19:45<02:52,  3.29it/s]

Epoch: 1, Loss: 3.2793169021606445


Processing epoch 00:  88%|████████▊ | 4284/4850 [19:45<02:52,  3.29it/s]

Epoch: 1, Loss: 2.8812098503112793


Processing epoch 00:  88%|████████▊ | 4285/4850 [19:46<02:46,  3.39it/s]

Epoch: 1, Loss: 3.3731112480163574


Processing epoch 00:  88%|████████▊ | 4286/4850 [19:46<02:43,  3.45it/s]

Epoch: 1, Loss: 2.849700450897217


Processing epoch 00:  88%|████████▊ | 4287/4850 [19:46<02:40,  3.51it/s]

Epoch: 1, Loss: 4.114389419555664


Processing epoch 00:  88%|████████▊ | 4288/4850 [19:46<02:39,  3.52it/s]

Epoch: 1, Loss: 4.294829368591309


Processing epoch 00:  88%|████████▊ | 4289/4850 [19:47<02:37,  3.56it/s]

Epoch: 1, Loss: 3.1828174591064453


Processing epoch 00:  88%|████████▊ | 4290/4850 [19:47<02:36,  3.58it/s]

Epoch: 1, Loss: 3.0658345222473145


Processing epoch 00:  88%|████████▊ | 4291/4850 [19:47<02:36,  3.58it/s]

Epoch: 1, Loss: 2.3548059463500977


Processing epoch 00:  88%|████████▊ | 4292/4850 [19:48<02:34,  3.60it/s]

Epoch: 1, Loss: 2.8996729850769043


Processing epoch 00:  89%|████████▊ | 4293/4850 [19:48<02:34,  3.61it/s]

Epoch: 1, Loss: 4.121872425079346


Processing epoch 00:  89%|████████▊ | 4294/4850 [19:48<02:33,  3.62it/s]

Epoch: 1, Loss: 4.266880989074707


Processing epoch 00:  89%|████████▊ | 4295/4850 [19:48<02:32,  3.63it/s]

Epoch: 1, Loss: 2.9323606491088867


Processing epoch 00:  89%|████████▊ | 4296/4850 [19:49<02:33,  3.62it/s]

Epoch: 1, Loss: 2.5487422943115234


Processing epoch 00:  89%|████████▊ | 4297/4850 [19:49<02:32,  3.62it/s]

Epoch: 1, Loss: 2.9209389686584473


Processing epoch 00:  89%|████████▊ | 4298/4850 [19:49<02:32,  3.63it/s]

Epoch: 1, Loss: 3.3600118160247803


Processing epoch 00:  89%|████████▊ | 4299/4850 [19:49<02:32,  3.60it/s]

Epoch: 1, Loss: 2.9477577209472656


Processing epoch 00:  89%|████████▊ | 4300/4850 [19:50<02:31,  3.62it/s]

Epoch: 1, Loss: 3.0017929077148438


Processing epoch 00:  89%|████████▊ | 4301/4850 [19:50<02:31,  3.62it/s]

Epoch: 1, Loss: 2.761314868927002


Processing epoch 00:  89%|████████▊ | 4302/4850 [19:50<02:31,  3.62it/s]

Epoch: 1, Loss: 2.509941577911377


Processing epoch 00:  89%|████████▊ | 4303/4850 [19:51<02:30,  3.63it/s]

Epoch: 1, Loss: 2.9021079540252686


Processing epoch 00:  89%|████████▊ | 4304/4850 [19:51<02:31,  3.61it/s]

Epoch: 1, Loss: 3.6116528511047363


Processing epoch 00:  89%|████████▉ | 4305/4850 [19:51<02:31,  3.60it/s]

Epoch: 1, Loss: 2.445389747619629


Processing epoch 00:  89%|████████▉ | 4306/4850 [19:51<02:31,  3.60it/s]

Epoch: 1, Loss: 2.591740846633911


Processing epoch 00:  89%|████████▉ | 4307/4850 [19:52<02:31,  3.59it/s]

Epoch: 1, Loss: 2.4085021018981934


Processing epoch 00:  89%|████████▉ | 4308/4850 [19:52<02:31,  3.59it/s]

Epoch: 1, Loss: 2.917072296142578


Processing epoch 00:  89%|████████▉ | 4309/4850 [19:52<02:30,  3.59it/s]

Epoch: 1, Loss: 2.6947662830352783


Processing epoch 00:  89%|████████▉ | 4310/4850 [19:53<02:30,  3.60it/s]

Epoch: 1, Loss: 2.7871623039245605


Processing epoch 00:  89%|████████▉ | 4311/4850 [19:53<02:28,  3.62it/s]

Epoch: 1, Loss: 2.568845748901367


Processing epoch 00:  89%|████████▉ | 4312/4850 [19:53<02:29,  3.61it/s]

Epoch: 1, Loss: 2.060288906097412


Processing epoch 00:  89%|████████▉ | 4313/4850 [19:53<02:28,  3.61it/s]

Epoch: 1, Loss: 3.63104248046875


Processing epoch 00:  89%|████████▉ | 4314/4850 [19:54<02:29,  3.59it/s]

Epoch: 1, Loss: 3.0228078365325928


Processing epoch 00:  89%|████████▉ | 4315/4850 [19:54<02:28,  3.61it/s]

Epoch: 1, Loss: 3.922187328338623


Processing epoch 00:  89%|████████▉ | 4316/4850 [19:54<02:28,  3.59it/s]

Epoch: 1, Loss: 3.487208843231201


Processing epoch 00:  89%|████████▉ | 4317/4850 [19:54<02:27,  3.61it/s]

Epoch: 1, Loss: 3.1155290603637695


Processing epoch 00:  89%|████████▉ | 4318/4850 [19:55<02:27,  3.61it/s]

Epoch: 1, Loss: 3.1542885303497314


Processing epoch 00:  89%|████████▉ | 4319/4850 [19:55<02:27,  3.61it/s]

Epoch: 1, Loss: 3.5136122703552246


Processing epoch 00:  89%|████████▉ | 4320/4850 [19:55<02:26,  3.62it/s]

Epoch: 1, Loss: 4.007576942443848


Processing epoch 00:  89%|████████▉ | 4321/4850 [19:56<02:29,  3.54it/s]

Epoch: 1, Loss: 2.9974265098571777


Processing epoch 00:  89%|████████▉ | 4322/4850 [19:56<02:31,  3.49it/s]

Epoch: 1, Loss: 3.23441219329834


Processing epoch 00:  89%|████████▉ | 4323/4850 [19:56<02:32,  3.45it/s]

Epoch: 1, Loss: 3.1165966987609863


Processing epoch 00:  89%|████████▉ | 4324/4850 [19:56<02:32,  3.45it/s]

Epoch: 1, Loss: 2.9651241302490234


Processing epoch 00:  89%|████████▉ | 4325/4850 [19:57<02:33,  3.42it/s]

Epoch: 1, Loss: 2.7167768478393555


Processing epoch 00:  89%|████████▉ | 4326/4850 [19:57<02:34,  3.39it/s]

Epoch: 1, Loss: 2.717519521713257


Processing epoch 00:  89%|████████▉ | 4327/4850 [19:57<02:36,  3.35it/s]

Epoch: 1, Loss: 4.243025779724121


Processing epoch 00:  89%|████████▉ | 4328/4850 [19:58<02:35,  3.35it/s]

Epoch: 1, Loss: 3.287777900695801


Processing epoch 00:  89%|████████▉ | 4329/4850 [19:58<02:34,  3.37it/s]

Epoch: 1, Loss: 2.7997937202453613


Processing epoch 00:  89%|████████▉ | 4330/4850 [19:58<02:34,  3.36it/s]

Epoch: 1, Loss: 3.197523593902588


Processing epoch 00:  89%|████████▉ | 4331/4850 [19:59<02:34,  3.36it/s]

Epoch: 1, Loss: 2.841512680053711


Processing epoch 00:  89%|████████▉ | 4332/4850 [19:59<02:33,  3.36it/s]

Epoch: 1, Loss: 3.2738237380981445


Processing epoch 00:  89%|████████▉ | 4333/4850 [19:59<02:29,  3.45it/s]

Epoch: 1, Loss: 3.3489878177642822


Processing epoch 00:  89%|████████▉ | 4334/4850 [19:59<02:27,  3.49it/s]

Epoch: 1, Loss: 2.9993791580200195


Processing epoch 00:  89%|████████▉ | 4335/4850 [20:00<02:26,  3.52it/s]

Epoch: 1, Loss: 3.925384521484375


Processing epoch 00:  89%|████████▉ | 4336/4850 [20:00<02:25,  3.53it/s]

Epoch: 1, Loss: 2.421909809112549


Processing epoch 00:  89%|████████▉ | 4337/4850 [20:00<02:24,  3.55it/s]

Epoch: 1, Loss: 2.5775146484375


Processing epoch 00:  89%|████████▉ | 4338/4850 [20:01<02:23,  3.57it/s]

Epoch: 1, Loss: 3.099574565887451


Processing epoch 00:  89%|████████▉ | 4339/4850 [20:01<02:23,  3.56it/s]

Epoch: 1, Loss: 2.917802333831787


Processing epoch 00:  89%|████████▉ | 4340/4850 [20:01<02:22,  3.58it/s]

Epoch: 1, Loss: 4.2700090408325195


Processing epoch 00:  90%|████████▉ | 4341/4850 [20:01<02:21,  3.59it/s]

Epoch: 1, Loss: 2.8336925506591797


Processing epoch 00:  90%|████████▉ | 4342/4850 [20:02<02:21,  3.59it/s]

Epoch: 1, Loss: 3.065047264099121


Processing epoch 00:  90%|████████▉ | 4343/4850 [20:02<02:22,  3.55it/s]

Epoch: 1, Loss: 2.716219186782837


Processing epoch 00:  90%|████████▉ | 4344/4850 [20:02<02:25,  3.49it/s]

Epoch: 1, Loss: 2.9223344326019287


Processing epoch 00:  90%|████████▉ | 4345/4850 [20:03<02:24,  3.50it/s]

Epoch: 1, Loss: 3.069549560546875


Processing epoch 00:  90%|████████▉ | 4346/4850 [20:03<02:23,  3.50it/s]

Epoch: 1, Loss: 4.004395961761475


Processing epoch 00:  90%|████████▉ | 4347/4850 [20:03<02:24,  3.48it/s]

Epoch: 1, Loss: 3.0784733295440674


Processing epoch 00:  90%|████████▉ | 4348/4850 [20:03<02:23,  3.49it/s]

Epoch: 1, Loss: 2.8411879539489746


Processing epoch 00:  90%|████████▉ | 4349/4850 [20:04<02:23,  3.49it/s]

Epoch: 1, Loss: 2.6294217109680176


Processing epoch 00:  90%|████████▉ | 4350/4850 [20:04<02:22,  3.52it/s]

Epoch: 1, Loss: 3.7346556186676025


Processing epoch 00:  90%|████████▉ | 4351/4850 [20:04<02:23,  3.47it/s]

Epoch: 1, Loss: 2.6127243041992188


Processing epoch 00:  90%|████████▉ | 4352/4850 [20:05<02:26,  3.40it/s]

Epoch: 1, Loss: 2.804340362548828


Processing epoch 00:  90%|████████▉ | 4353/4850 [20:05<02:24,  3.43it/s]

Epoch: 1, Loss: 2.9867289066314697


Processing epoch 00:  90%|████████▉ | 4354/4850 [20:05<02:24,  3.42it/s]

Epoch: 1, Loss: 4.21475076675415


Processing epoch 00:  90%|████████▉ | 4355/4850 [20:05<02:23,  3.44it/s]

Epoch: 1, Loss: 2.919985294342041


Processing epoch 00:  90%|████████▉ | 4356/4850 [20:06<02:26,  3.36it/s]

Epoch: 1, Loss: 2.685853958129883


Processing epoch 00:  90%|████████▉ | 4357/4850 [20:06<02:24,  3.41it/s]

Epoch: 1, Loss: 2.9511780738830566


Processing epoch 00:  90%|████████▉ | 4358/4850 [20:06<02:22,  3.46it/s]

Epoch: 1, Loss: 2.2695271968841553


Processing epoch 00:  90%|████████▉ | 4359/4850 [20:07<02:19,  3.51it/s]

Epoch: 1, Loss: 2.648399829864502


Processing epoch 00:  90%|████████▉ | 4360/4850 [20:07<02:18,  3.54it/s]

Epoch: 1, Loss: 2.9522476196289062


Processing epoch 00:  90%|████████▉ | 4361/4850 [20:07<02:16,  3.57it/s]

Epoch: 1, Loss: 3.1956734657287598


Processing epoch 00:  90%|████████▉ | 4362/4850 [20:07<02:16,  3.56it/s]

Epoch: 1, Loss: 2.6992013454437256


Processing epoch 00:  90%|████████▉ | 4363/4850 [20:08<02:16,  3.57it/s]

Epoch: 1, Loss: 3.262340545654297


Processing epoch 00:  90%|████████▉ | 4364/4850 [20:08<02:15,  3.58it/s]

Epoch: 1, Loss: 3.022085428237915


Processing epoch 00:  90%|█████████ | 4365/4850 [20:08<02:16,  3.57it/s]

Epoch: 1, Loss: 2.7878808975219727


Processing epoch 00:  90%|█████████ | 4366/4850 [20:09<02:15,  3.58it/s]

Epoch: 1, Loss: 2.1139068603515625


Processing epoch 00:  90%|█████████ | 4367/4850 [20:09<02:14,  3.58it/s]

Epoch: 1, Loss: 3.5231170654296875


Processing epoch 00:  90%|█████████ | 4368/4850 [20:09<02:17,  3.51it/s]

Epoch: 1, Loss: 2.989290714263916


Processing epoch 00:  90%|█████████ | 4369/4850 [20:09<02:19,  3.45it/s]

Epoch: 1, Loss: 2.6801443099975586


Processing epoch 00:  90%|█████████ | 4370/4850 [20:10<02:17,  3.49it/s]

Epoch: 1, Loss: 3.1709327697753906


Processing epoch 00:  90%|█████████ | 4371/4850 [20:10<02:18,  3.46it/s]

Epoch: 1, Loss: 2.8801474571228027


Processing epoch 00:  90%|█████████ | 4372/4850 [20:10<02:17,  3.48it/s]

Epoch: 1, Loss: 2.9949541091918945


Processing epoch 00:  90%|█████████ | 4373/4850 [20:11<02:18,  3.45it/s]

Epoch: 1, Loss: 3.2252073287963867


Processing epoch 00:  90%|█████████ | 4374/4850 [20:11<02:19,  3.41it/s]

Epoch: 1, Loss: 3.212573528289795


Processing epoch 00:  90%|█████████ | 4375/4850 [20:11<02:18,  3.43it/s]

Epoch: 1, Loss: 2.8056020736694336


Processing epoch 00:  90%|█████████ | 4376/4850 [20:11<02:19,  3.40it/s]

Epoch: 1, Loss: 2.937685012817383


Processing epoch 00:  90%|█████████ | 4377/4850 [20:12<02:18,  3.41it/s]

Epoch: 1, Loss: 2.3541247844696045


Processing epoch 00:  90%|█████████ | 4378/4850 [20:12<02:17,  3.43it/s]

Epoch: 1, Loss: 2.665739059448242


Processing epoch 00:  90%|█████████ | 4379/4850 [20:12<02:17,  3.42it/s]

Epoch: 1, Loss: 3.0610976219177246


Processing epoch 00:  90%|█████████ | 4380/4850 [20:13<02:15,  3.46it/s]

Epoch: 1, Loss: 3.206873893737793


Processing epoch 00:  90%|█████████ | 4381/4850 [20:13<02:14,  3.50it/s]

Epoch: 1, Loss: 3.5006728172302246


Processing epoch 00:  90%|█████████ | 4382/4850 [20:13<02:12,  3.53it/s]

Epoch: 1, Loss: 3.16829776763916


Processing epoch 00:  90%|█████████ | 4383/4850 [20:13<02:13,  3.51it/s]

Epoch: 1, Loss: 3.849149227142334


Processing epoch 00:  90%|█████████ | 4384/4850 [20:14<02:11,  3.54it/s]

Epoch: 1, Loss: 2.49961519241333


Processing epoch 00:  90%|█████████ | 4385/4850 [20:14<02:11,  3.55it/s]

Epoch: 1, Loss: 2.7634990215301514


Processing epoch 00:  90%|█████████ | 4386/4850 [20:14<02:09,  3.58it/s]

Epoch: 1, Loss: 4.210114002227783


Processing epoch 00:  90%|█████████ | 4387/4850 [20:15<02:09,  3.58it/s]

Epoch: 1, Loss: 2.544102668762207


Processing epoch 00:  90%|█████████ | 4388/4850 [20:15<02:09,  3.58it/s]

Epoch: 1, Loss: 2.6098036766052246


Processing epoch 00:  90%|█████████ | 4389/4850 [20:15<02:08,  3.59it/s]

Epoch: 1, Loss: 3.6460165977478027


Processing epoch 00:  91%|█████████ | 4390/4850 [20:15<02:07,  3.60it/s]

Epoch: 1, Loss: 3.1612801551818848


Processing epoch 00:  91%|█████████ | 4391/4850 [20:16<02:07,  3.59it/s]

Epoch: 1, Loss: 2.8665828704833984


Processing epoch 00:  91%|█████████ | 4392/4850 [20:16<02:07,  3.60it/s]

Epoch: 1, Loss: 2.432051181793213


Processing epoch 00:  91%|█████████ | 4393/4850 [20:16<02:07,  3.59it/s]

Epoch: 1, Loss: 2.853745222091675


Processing epoch 00:  91%|█████████ | 4394/4850 [20:16<02:07,  3.59it/s]

Epoch: 1, Loss: 2.7490410804748535


Processing epoch 00:  91%|█████████ | 4395/4850 [20:17<02:06,  3.60it/s]

Epoch: 1, Loss: 3.044400215148926


Processing epoch 00:  91%|█████████ | 4396/4850 [20:17<02:06,  3.59it/s]

Epoch: 1, Loss: 3.186753273010254


Processing epoch 00:  91%|█████████ | 4397/4850 [20:17<02:05,  3.61it/s]

Epoch: 1, Loss: 3.4594650268554688


Processing epoch 00:  91%|█████████ | 4398/4850 [20:18<02:06,  3.58it/s]

Epoch: 1, Loss: 2.792281150817871


Processing epoch 00:  91%|█████████ | 4399/4850 [20:18<02:05,  3.58it/s]

Epoch: 1, Loss: 2.5566976070404053


Processing epoch 00:  91%|█████████ | 4400/4850 [20:18<02:05,  3.59it/s]

Epoch: 1, Loss: 2.9081969261169434


Processing epoch 00:  91%|█████████ | 4401/4850 [20:18<02:04,  3.60it/s]

Epoch: 1, Loss: 3.093494415283203


Processing epoch 00:  91%|█████████ | 4402/4850 [20:19<02:04,  3.59it/s]

Epoch: 1, Loss: 2.7008447647094727


Processing epoch 00:  91%|█████████ | 4403/4850 [20:19<02:04,  3.60it/s]

Epoch: 1, Loss: 3.1198294162750244


Processing epoch 00:  91%|█████████ | 4404/4850 [20:19<02:03,  3.61it/s]

Epoch: 1, Loss: 3.0782647132873535


Processing epoch 00:  91%|█████████ | 4405/4850 [20:20<02:03,  3.60it/s]

Epoch: 1, Loss: 3.0802721977233887


Processing epoch 00:  91%|█████████ | 4406/4850 [20:20<02:03,  3.61it/s]

Epoch: 1, Loss: 2.7953758239746094


Processing epoch 00:  91%|█████████ | 4407/4850 [20:20<02:02,  3.62it/s]

Epoch: 1, Loss: 2.875439167022705


Processing epoch 00:  91%|█████████ | 4408/4850 [20:20<02:01,  3.62it/s]

Epoch: 1, Loss: 2.5515623092651367


Processing epoch 00:  91%|█████████ | 4409/4850 [20:21<02:03,  3.58it/s]

Epoch: 1, Loss: 2.7404847145080566


Processing epoch 00:  91%|█████████ | 4410/4850 [20:21<02:02,  3.58it/s]

Epoch: 1, Loss: 3.089043617248535


Processing epoch 00:  91%|█████████ | 4411/4850 [20:21<02:01,  3.60it/s]

Epoch: 1, Loss: 3.8741354942321777


Processing epoch 00:  91%|█████████ | 4412/4850 [20:21<02:01,  3.60it/s]

Epoch: 1, Loss: 2.722203493118286


Processing epoch 00:  91%|█████████ | 4413/4850 [20:22<02:02,  3.58it/s]

Epoch: 1, Loss: 2.703190326690674


Processing epoch 00:  91%|█████████ | 4414/4850 [20:22<02:02,  3.56it/s]

Epoch: 1, Loss: 2.878757953643799


Processing epoch 00:  91%|█████████ | 4415/4850 [20:22<02:01,  3.58it/s]

Epoch: 1, Loss: 3.262988567352295


Processing epoch 00:  91%|█████████ | 4416/4850 [20:23<02:03,  3.53it/s]

Epoch: 1, Loss: 3.120366334915161


Processing epoch 00:  91%|█████████ | 4417/4850 [20:23<02:05,  3.46it/s]

Epoch: 1, Loss: 2.772448778152466


Processing epoch 00:  91%|█████████ | 4418/4850 [20:23<02:04,  3.47it/s]

Epoch: 1, Loss: 2.5312986373901367


Processing epoch 00:  91%|█████████ | 4419/4850 [20:24<02:03,  3.49it/s]

Epoch: 1, Loss: 2.9990453720092773


Processing epoch 00:  91%|█████████ | 4420/4850 [20:24<02:04,  3.45it/s]

Epoch: 1, Loss: 2.7013707160949707


Processing epoch 00:  91%|█████████ | 4421/4850 [20:24<02:07,  3.38it/s]

Epoch: 1, Loss: 3.08896541595459


Processing epoch 00:  91%|█████████ | 4422/4850 [20:24<02:05,  3.41it/s]

Epoch: 1, Loss: 3.3020524978637695


Processing epoch 00:  91%|█████████ | 4423/4850 [20:25<02:04,  3.44it/s]

Epoch: 1, Loss: 3.018655300140381


Processing epoch 00:  91%|█████████ | 4424/4850 [20:25<02:05,  3.41it/s]

Epoch: 1, Loss: 2.819110631942749


Processing epoch 00:  91%|█████████ | 4425/4850 [20:25<02:05,  3.39it/s]

Epoch: 1, Loss: 2.882171392440796


Processing epoch 00:  91%|█████████▏| 4426/4850 [20:26<02:05,  3.37it/s]

Epoch: 1, Loss: 3.0163397789001465


Processing epoch 00:  91%|█████████▏| 4427/4850 [20:26<02:06,  3.35it/s]

Epoch: 1, Loss: 4.411332607269287


Processing epoch 00:  91%|█████████▏| 4428/4850 [20:26<02:02,  3.43it/s]

Epoch: 1, Loss: 3.0472512245178223


Processing epoch 00:  91%|█████████▏| 4429/4850 [20:26<02:00,  3.48it/s]

Epoch: 1, Loss: 3.310703754425049


Processing epoch 00:  91%|█████████▏| 4430/4850 [20:27<01:59,  3.50it/s]

Epoch: 1, Loss: 2.8257193565368652


Processing epoch 00:  91%|█████████▏| 4431/4850 [20:27<01:58,  3.52it/s]

Epoch: 1, Loss: 2.9339590072631836


Processing epoch 00:  91%|█████████▏| 4432/4850 [20:27<01:57,  3.54it/s]

Epoch: 1, Loss: 3.118572950363159


Processing epoch 00:  91%|█████████▏| 4433/4850 [20:28<01:57,  3.56it/s]

Epoch: 1, Loss: 2.7902965545654297


Processing epoch 00:  91%|█████████▏| 4434/4850 [20:28<01:56,  3.56it/s]

Epoch: 1, Loss: 3.629870653152466


Processing epoch 00:  91%|█████████▏| 4435/4850 [20:28<01:55,  3.58it/s]

Epoch: 1, Loss: 2.325331687927246


Processing epoch 00:  91%|█████████▏| 4436/4850 [20:28<01:55,  3.59it/s]

Epoch: 1, Loss: 2.532158851623535


Processing epoch 00:  91%|█████████▏| 4437/4850 [20:29<01:54,  3.59it/s]

Epoch: 1, Loss: 3.5892200469970703


Processing epoch 00:  92%|█████████▏| 4438/4850 [20:29<01:55,  3.56it/s]

Epoch: 1, Loss: 3.5774927139282227


Processing epoch 00:  92%|█████████▏| 4439/4850 [20:29<01:55,  3.57it/s]

Epoch: 1, Loss: 3.0610554218292236


Processing epoch 00:  92%|█████████▏| 4440/4850 [20:30<01:54,  3.57it/s]

Epoch: 1, Loss: 3.205671787261963


Processing epoch 00:  92%|█████████▏| 4441/4850 [20:30<01:54,  3.58it/s]

Epoch: 1, Loss: 3.1329538822174072


Processing epoch 00:  92%|█████████▏| 4442/4850 [20:30<01:54,  3.57it/s]

Epoch: 1, Loss: 2.7135062217712402


Processing epoch 00:  92%|█████████▏| 4443/4850 [20:30<01:54,  3.56it/s]

Epoch: 1, Loss: 3.1141510009765625


Processing epoch 00:  92%|█████████▏| 4444/4850 [20:31<01:53,  3.58it/s]

Epoch: 1, Loss: 3.794424057006836


Processing epoch 00:  92%|█████████▏| 4445/4850 [20:31<01:53,  3.57it/s]

Epoch: 1, Loss: 2.6635959148406982


Processing epoch 00:  92%|█████████▏| 4446/4850 [20:31<01:52,  3.59it/s]

Epoch: 1, Loss: 2.9260659217834473


Processing epoch 00:  92%|█████████▏| 4447/4850 [20:31<01:51,  3.61it/s]

Epoch: 1, Loss: 2.7636146545410156


Processing epoch 00:  92%|█████████▏| 4448/4850 [20:32<01:51,  3.61it/s]

Epoch: 1, Loss: 2.8279976844787598


Processing epoch 00:  92%|█████████▏| 4449/4850 [20:32<01:52,  3.57it/s]

Epoch: 1, Loss: 2.8717780113220215


Processing epoch 00:  92%|█████████▏| 4450/4850 [20:32<01:51,  3.59it/s]

Epoch: 1, Loss: 2.860694169998169


Processing epoch 00:  92%|█████████▏| 4451/4850 [20:33<01:51,  3.59it/s]

Epoch: 1, Loss: 3.0825085639953613


Processing epoch 00:  92%|█████████▏| 4452/4850 [20:33<01:51,  3.58it/s]

Epoch: 1, Loss: 2.967716693878174


Processing epoch 00:  92%|█████████▏| 4453/4850 [20:33<01:52,  3.53it/s]

Epoch: 1, Loss: 2.5952324867248535


Processing epoch 00:  92%|█████████▏| 4454/4850 [20:33<01:51,  3.56it/s]

Epoch: 1, Loss: 2.6569740772247314


Processing epoch 00:  92%|█████████▏| 4455/4850 [20:34<01:50,  3.57it/s]

Epoch: 1, Loss: 3.2733583450317383


Processing epoch 00:  92%|█████████▏| 4456/4850 [20:34<01:50,  3.58it/s]

Epoch: 1, Loss: 3.040130138397217


Processing epoch 00:  92%|█████████▏| 4457/4850 [20:34<01:49,  3.59it/s]

Epoch: 1, Loss: 2.4752871990203857


Processing epoch 00:  92%|█████████▏| 4458/4850 [20:35<01:49,  3.59it/s]

Epoch: 1, Loss: 3.2989001274108887


Processing epoch 00:  92%|█████████▏| 4459/4850 [20:35<01:48,  3.61it/s]

Epoch: 1, Loss: 3.5788683891296387


Processing epoch 00:  92%|█████████▏| 4460/4850 [20:35<01:48,  3.60it/s]

Epoch: 1, Loss: 3.456362724304199


Processing epoch 00:  92%|█████████▏| 4461/4850 [20:35<01:48,  3.59it/s]

Epoch: 1, Loss: 2.8390626907348633


Processing epoch 00:  92%|█████████▏| 4462/4850 [20:36<01:48,  3.59it/s]

Epoch: 1, Loss: 2.894286632537842


Processing epoch 00:  92%|█████████▏| 4463/4850 [20:36<01:47,  3.61it/s]

Epoch: 1, Loss: 3.043896198272705


Processing epoch 00:  92%|█████████▏| 4464/4850 [20:36<01:49,  3.52it/s]

Epoch: 1, Loss: 3.740018367767334


Processing epoch 00:  92%|█████████▏| 4465/4850 [20:37<01:50,  3.49it/s]

Epoch: 1, Loss: 2.8601016998291016


Processing epoch 00:  92%|█████████▏| 4466/4850 [20:37<01:49,  3.52it/s]

Epoch: 1, Loss: 2.8079605102539062


Processing epoch 00:  92%|█████████▏| 4467/4850 [20:37<01:49,  3.49it/s]

Epoch: 1, Loss: 2.370967388153076


Processing epoch 00:  92%|█████████▏| 4468/4850 [20:37<01:48,  3.52it/s]

Epoch: 1, Loss: 2.987680435180664


Processing epoch 00:  92%|█████████▏| 4469/4850 [20:38<01:50,  3.45it/s]

Epoch: 1, Loss: 3.1815638542175293


Processing epoch 00:  92%|█████████▏| 4470/4850 [20:38<01:51,  3.40it/s]

Epoch: 1, Loss: 2.7798562049865723


Processing epoch 00:  92%|█████████▏| 4471/4850 [20:38<01:51,  3.41it/s]

Epoch: 1, Loss: 3.107880115509033


Processing epoch 00:  92%|█████████▏| 4472/4850 [20:39<01:52,  3.37it/s]

Epoch: 1, Loss: 3.2727222442626953


Processing epoch 00:  92%|█████████▏| 4473/4850 [20:39<01:51,  3.39it/s]

Epoch: 1, Loss: 2.5951130390167236


Processing epoch 00:  92%|█████████▏| 4474/4850 [20:39<01:52,  3.35it/s]

Epoch: 1, Loss: 2.969134569168091


Processing epoch 00:  92%|█████████▏| 4475/4850 [20:39<01:52,  3.33it/s]

Epoch: 1, Loss: 2.607341766357422


Processing epoch 00:  92%|█████████▏| 4476/4850 [20:40<01:50,  3.40it/s]

Epoch: 1, Loss: 2.531080722808838


Processing epoch 00:  92%|█████████▏| 4477/4850 [20:40<01:47,  3.46it/s]

Epoch: 1, Loss: 2.866133689880371


Processing epoch 00:  92%|█████████▏| 4478/4850 [20:40<01:46,  3.51it/s]

Epoch: 1, Loss: 2.642583131790161


Processing epoch 00:  92%|█████████▏| 4479/4850 [20:41<01:44,  3.53it/s]

Epoch: 1, Loss: 2.9407877922058105


Processing epoch 00:  92%|█████████▏| 4480/4850 [20:41<01:43,  3.56it/s]

Epoch: 1, Loss: 3.288743495941162


Processing epoch 00:  92%|█████████▏| 4481/4850 [20:41<01:43,  3.57it/s]

Epoch: 1, Loss: 3.2052621841430664


Processing epoch 00:  92%|█████████▏| 4482/4850 [20:41<01:42,  3.59it/s]

Epoch: 1, Loss: 3.3022265434265137


Processing epoch 00:  92%|█████████▏| 4483/4850 [20:42<01:41,  3.60it/s]

Epoch: 1, Loss: 3.9932918548583984


Processing epoch 00:  92%|█████████▏| 4484/4850 [20:42<01:41,  3.61it/s]

Epoch: 1, Loss: 3.844517946243286


Processing epoch 00:  92%|█████████▏| 4485/4850 [20:42<01:41,  3.61it/s]

Epoch: 1, Loss: 3.1473708152770996


Processing epoch 00:  92%|█████████▏| 4486/4850 [20:43<01:41,  3.57it/s]

Epoch: 1, Loss: 3.303429126739502


Processing epoch 00:  93%|█████████▎| 4487/4850 [20:43<01:41,  3.58it/s]

Epoch: 1, Loss: 2.861274003982544


Processing epoch 00:  93%|█████████▎| 4488/4850 [20:43<01:42,  3.54it/s]

Epoch: 1, Loss: 2.930649995803833


Processing epoch 00:  93%|█████████▎| 4489/4850 [20:43<01:41,  3.56it/s]

Epoch: 1, Loss: 3.1117916107177734


Processing epoch 00:  93%|█████████▎| 4490/4850 [20:44<01:40,  3.57it/s]

Epoch: 1, Loss: 3.117732286453247


Processing epoch 00:  93%|█████████▎| 4491/4850 [20:44<01:40,  3.57it/s]

Epoch: 1, Loss: 2.883547782897949


Processing epoch 00:  93%|█████████▎| 4492/4850 [20:44<01:39,  3.60it/s]

Epoch: 1, Loss: 3.89751935005188


Processing epoch 00:  93%|█████████▎| 4493/4850 [20:44<01:39,  3.60it/s]

Epoch: 1, Loss: 3.386159896850586


Processing epoch 00:  93%|█████████▎| 4494/4850 [20:45<01:38,  3.60it/s]

Epoch: 1, Loss: 3.146272659301758


Processing epoch 00:  93%|█████████▎| 4495/4850 [20:45<01:38,  3.59it/s]

Epoch: 1, Loss: 2.814267873764038


Processing epoch 00:  93%|█████████▎| 4496/4850 [20:45<01:38,  3.60it/s]

Epoch: 1, Loss: 2.852323055267334


Processing epoch 00:  93%|█████████▎| 4497/4850 [20:46<01:38,  3.58it/s]

Epoch: 1, Loss: 2.295578956604004


Processing epoch 00:  93%|█████████▎| 4498/4850 [20:46<01:37,  3.59it/s]

Epoch: 1, Loss: 3.0043282508850098


Processing epoch 00:  93%|█████████▎| 4499/4850 [20:46<01:38,  3.58it/s]

Epoch: 1, Loss: 2.358829975128174


Processing epoch 00:  93%|█████████▎| 4500/4850 [20:46<01:37,  3.58it/s]

Epoch: 1, Loss: 3.190988779067993


Processing epoch 00:  93%|█████████▎| 4501/4850 [20:47<01:37,  3.59it/s]

Epoch: 1, Loss: 2.203594923019409


Processing epoch 00:  93%|█████████▎| 4502/4850 [20:47<01:37,  3.58it/s]

Epoch: 1, Loss: 2.535539150238037


Processing epoch 00:  93%|█████████▎| 4503/4850 [20:47<01:37,  3.57it/s]

Epoch: 1, Loss: 2.7852632999420166


Processing epoch 00:  93%|█████████▎| 4504/4850 [20:48<01:36,  3.59it/s]

Epoch: 1, Loss: 2.8095691204071045


Processing epoch 00:  93%|█████████▎| 4505/4850 [20:48<01:36,  3.59it/s]

Epoch: 1, Loss: 2.560154914855957


Processing epoch 00:  93%|█████████▎| 4506/4850 [20:48<01:35,  3.60it/s]

Epoch: 1, Loss: 2.8607559204101562


Processing epoch 00:  93%|█████████▎| 4507/4850 [20:48<01:35,  3.59it/s]

Epoch: 1, Loss: 2.9184937477111816


Processing epoch 00:  93%|█████████▎| 4508/4850 [20:49<01:35,  3.60it/s]

Epoch: 1, Loss: 3.155400037765503


Processing epoch 00:  93%|█████████▎| 4509/4850 [20:49<01:34,  3.62it/s]

Epoch: 1, Loss: 4.22759485244751


Processing epoch 00:  93%|█████████▎| 4510/4850 [20:49<01:33,  3.63it/s]

Epoch: 1, Loss: 3.7987682819366455


Processing epoch 00:  93%|█████████▎| 4511/4850 [20:49<01:33,  3.62it/s]

Epoch: 1, Loss: 2.8460540771484375


Processing epoch 00:  93%|█████████▎| 4512/4850 [20:50<01:36,  3.52it/s]

Epoch: 1, Loss: 3.0521512031555176


Processing epoch 00:  93%|█████████▎| 4513/4850 [20:50<01:37,  3.46it/s]

Epoch: 1, Loss: 2.6650216579437256


Processing epoch 00:  93%|█████████▎| 4514/4850 [20:50<01:37,  3.46it/s]

Epoch: 1, Loss: 3.088299512863159


Processing epoch 00:  93%|█████████▎| 4515/4850 [20:51<01:37,  3.44it/s]

Epoch: 1, Loss: 2.5400304794311523


Processing epoch 00:  93%|█████████▎| 4516/4850 [20:51<01:37,  3.42it/s]

Epoch: 1, Loss: 3.3784167766571045


Processing epoch 00:  93%|█████████▎| 4517/4850 [20:51<01:37,  3.42it/s]

Epoch: 1, Loss: 3.0109171867370605


Processing epoch 00:  93%|█████████▎| 4518/4850 [20:52<01:38,  3.38it/s]

Epoch: 1, Loss: 3.0303497314453125


Processing epoch 00:  93%|█████████▎| 4519/4850 [20:52<01:38,  3.36it/s]

Epoch: 1, Loss: 3.643810272216797


Processing epoch 00:  93%|█████████▎| 4520/4850 [20:52<01:39,  3.33it/s]

Epoch: 1, Loss: 3.6836981773376465


Processing epoch 00:  93%|█████████▎| 4521/4850 [20:52<01:37,  3.39it/s]

Epoch: 1, Loss: 3.406355857849121


Processing epoch 00:  93%|█████████▎| 4522/4850 [20:53<01:35,  3.43it/s]

Epoch: 1, Loss: 2.4305105209350586


Processing epoch 00:  93%|█████████▎| 4523/4850 [20:53<01:36,  3.38it/s]

Epoch: 1, Loss: 2.9051408767700195


Processing epoch 00:  93%|█████████▎| 4524/4850 [20:53<01:34,  3.44it/s]

Epoch: 1, Loss: 2.814183235168457


Processing epoch 00:  93%|█████████▎| 4525/4850 [20:54<01:33,  3.49it/s]

Epoch: 1, Loss: 3.211219310760498


Processing epoch 00:  93%|█████████▎| 4526/4850 [20:54<01:32,  3.51it/s]

Epoch: 1, Loss: 2.695582389831543


Processing epoch 00:  93%|█████████▎| 4527/4850 [20:54<01:31,  3.54it/s]

Epoch: 1, Loss: 3.4330716133117676


Processing epoch 00:  93%|█████████▎| 4528/4850 [20:54<01:30,  3.54it/s]

Epoch: 1, Loss: 2.825526237487793


Processing epoch 00:  93%|█████████▎| 4529/4850 [20:55<01:30,  3.54it/s]

Epoch: 1, Loss: 2.4750237464904785


Processing epoch 00:  93%|█████████▎| 4530/4850 [20:55<01:29,  3.56it/s]

Epoch: 1, Loss: 2.503251791000366


Processing epoch 00:  93%|█████████▎| 4531/4850 [20:55<01:29,  3.58it/s]

Epoch: 1, Loss: 2.791111946105957


Processing epoch 00:  93%|█████████▎| 4532/4850 [20:56<01:28,  3.59it/s]

Epoch: 1, Loss: 3.1856393814086914


Processing epoch 00:  93%|█████████▎| 4533/4850 [20:56<01:28,  3.59it/s]

Epoch: 1, Loss: 3.146406888961792


Processing epoch 00:  93%|█████████▎| 4534/4850 [20:56<01:27,  3.59it/s]

Epoch: 1, Loss: 3.142989158630371


Processing epoch 00:  94%|█████████▎| 4535/4850 [20:56<01:27,  3.59it/s]

Epoch: 1, Loss: 2.6765542030334473


Processing epoch 00:  94%|█████████▎| 4536/4850 [20:57<01:27,  3.58it/s]

Epoch: 1, Loss: 2.684624671936035


Processing epoch 00:  94%|█████████▎| 4537/4850 [20:57<01:28,  3.54it/s]

Epoch: 1, Loss: 3.8036818504333496


Processing epoch 00:  94%|█████████▎| 4538/4850 [20:57<01:28,  3.54it/s]

Epoch: 1, Loss: 2.538344144821167


Processing epoch 00:  94%|█████████▎| 4539/4850 [20:58<01:27,  3.57it/s]

Epoch: 1, Loss: 3.2236173152923584


Processing epoch 00:  94%|█████████▎| 4540/4850 [20:58<01:26,  3.58it/s]

Epoch: 1, Loss: 2.8865151405334473


Processing epoch 00:  94%|█████████▎| 4541/4850 [20:58<01:26,  3.58it/s]

Epoch: 1, Loss: 2.9940388202667236


Processing epoch 00:  94%|█████████▎| 4542/4850 [20:58<01:25,  3.58it/s]

Epoch: 1, Loss: 2.5670084953308105


Processing epoch 00:  94%|█████████▎| 4543/4850 [20:59<01:25,  3.58it/s]

Epoch: 1, Loss: 3.0195889472961426


Processing epoch 00:  94%|█████████▎| 4544/4850 [20:59<01:25,  3.59it/s]

Epoch: 1, Loss: 2.900766372680664


Processing epoch 00:  94%|█████████▎| 4545/4850 [20:59<01:25,  3.59it/s]

Epoch: 1, Loss: 2.6325697898864746


Processing epoch 00:  94%|█████████▎| 4546/4850 [20:59<01:25,  3.56it/s]

Epoch: 1, Loss: 3.0233030319213867


Processing epoch 00:  94%|█████████▍| 4547/4850 [21:00<01:25,  3.56it/s]

Epoch: 1, Loss: 2.5651025772094727


Processing epoch 00:  94%|█████████▍| 4548/4850 [21:00<01:25,  3.54it/s]

Epoch: 1, Loss: 3.2407381534576416


Processing epoch 00:  94%|█████████▍| 4549/4850 [21:00<01:24,  3.56it/s]

Epoch: 1, Loss: 2.585529088973999


Processing epoch 00:  94%|█████████▍| 4550/4850 [21:01<01:24,  3.57it/s]

Epoch: 1, Loss: 2.4938440322875977


Processing epoch 00:  94%|█████████▍| 4551/4850 [21:01<01:23,  3.57it/s]

Epoch: 1, Loss: 2.5866596698760986


Processing epoch 00:  94%|█████████▍| 4552/4850 [21:01<01:23,  3.56it/s]

Epoch: 1, Loss: 2.5307466983795166


Processing epoch 00:  94%|█████████▍| 4553/4850 [21:01<01:23,  3.58it/s]

Epoch: 1, Loss: 3.4492573738098145


Processing epoch 00:  94%|█████████▍| 4554/4850 [21:02<01:22,  3.59it/s]

Epoch: 1, Loss: 3.06643009185791


Processing epoch 00:  94%|█████████▍| 4555/4850 [21:02<01:22,  3.59it/s]

Epoch: 1, Loss: 3.029934883117676


Processing epoch 00:  94%|█████████▍| 4556/4850 [21:02<01:21,  3.59it/s]

Epoch: 1, Loss: 3.2099170684814453


Processing epoch 00:  94%|█████████▍| 4557/4850 [21:03<01:21,  3.60it/s]

Epoch: 1, Loss: 2.77921724319458


Processing epoch 00:  94%|█████████▍| 4558/4850 [21:03<01:21,  3.60it/s]

Epoch: 1, Loss: 2.3328652381896973


Processing epoch 00:  94%|█████████▍| 4559/4850 [21:03<01:20,  3.60it/s]

Epoch: 1, Loss: 3.1423888206481934


Processing epoch 00:  94%|█████████▍| 4560/4850 [21:03<01:21,  3.55it/s]

Epoch: 1, Loss: 2.9190163612365723


Processing epoch 00:  94%|█████████▍| 4561/4850 [21:04<01:21,  3.55it/s]

Epoch: 1, Loss: 2.7561445236206055


Processing epoch 00:  94%|█████████▍| 4562/4850 [21:04<01:20,  3.56it/s]

Epoch: 1, Loss: 3.0239686965942383


Processing epoch 00:  94%|█████████▍| 4563/4850 [21:04<01:22,  3.48it/s]

Epoch: 1, Loss: 3.4687399864196777


Processing epoch 00:  94%|█████████▍| 4564/4850 [21:05<01:21,  3.50it/s]

Epoch: 1, Loss: 2.6095588207244873


Processing epoch 00:  94%|█████████▍| 4565/4850 [21:05<01:21,  3.49it/s]

Epoch: 1, Loss: 2.5250799655914307


Processing epoch 00:  94%|█████████▍| 4566/4850 [21:05<01:21,  3.47it/s]

Epoch: 1, Loss: 3.023988962173462


Processing epoch 00:  94%|█████████▍| 4567/4850 [21:05<01:21,  3.48it/s]

Epoch: 1, Loss: 2.864879846572876


Processing epoch 00:  94%|█████████▍| 4568/4850 [21:06<01:22,  3.44it/s]

Epoch: 1, Loss: 2.185732364654541


Processing epoch 00:  94%|█████████▍| 4569/4850 [21:06<01:22,  3.39it/s]

Epoch: 1, Loss: 2.512485980987549


Processing epoch 00:  94%|█████████▍| 4570/4850 [21:06<01:22,  3.39it/s]

Epoch: 1, Loss: 2.569164752960205


Processing epoch 00:  94%|█████████▍| 4571/4850 [21:07<01:22,  3.38it/s]

Epoch: 1, Loss: 2.398455858230591


Processing epoch 00:  94%|█████████▍| 4572/4850 [21:07<01:20,  3.46it/s]

Epoch: 1, Loss: 3.024819850921631


Processing epoch 00:  94%|█████████▍| 4573/4850 [21:07<01:19,  3.48it/s]

Epoch: 1, Loss: 2.397369861602783


Processing epoch 00:  94%|█████████▍| 4574/4850 [21:07<01:19,  3.47it/s]

Epoch: 1, Loss: 2.9429407119750977


Processing epoch 00:  94%|█████████▍| 4575/4850 [21:08<01:18,  3.51it/s]

Epoch: 1, Loss: 3.8910536766052246


Processing epoch 00:  94%|█████████▍| 4576/4850 [21:08<01:17,  3.54it/s]

Epoch: 1, Loss: 2.670600652694702


Processing epoch 00:  94%|█████████▍| 4577/4850 [21:08<01:16,  3.55it/s]

Epoch: 1, Loss: 2.4895858764648438


Processing epoch 00:  94%|█████████▍| 4578/4850 [21:09<01:16,  3.56it/s]

Epoch: 1, Loss: 2.6548075675964355


Processing epoch 00:  94%|█████████▍| 4579/4850 [21:09<01:15,  3.57it/s]

Epoch: 1, Loss: 2.6537768840789795


Processing epoch 00:  94%|█████████▍| 4580/4850 [21:09<01:15,  3.59it/s]

Epoch: 1, Loss: 3.2477893829345703


Processing epoch 00:  94%|█████████▍| 4581/4850 [21:09<01:15,  3.58it/s]

Epoch: 1, Loss: 2.633100986480713


Processing epoch 00:  94%|█████████▍| 4582/4850 [21:10<01:15,  3.55it/s]

Epoch: 1, Loss: 3.667901039123535


Processing epoch 00:  94%|█████████▍| 4583/4850 [21:10<01:15,  3.54it/s]

Epoch: 1, Loss: 2.5322341918945312


Processing epoch 00:  95%|█████████▍| 4584/4850 [21:10<01:14,  3.55it/s]

Epoch: 1, Loss: 2.524933338165283


Processing epoch 00:  95%|█████████▍| 4585/4850 [21:11<01:14,  3.54it/s]

Epoch: 1, Loss: 3.022455930709839


Processing epoch 00:  95%|█████████▍| 4586/4850 [21:11<01:13,  3.57it/s]

Epoch: 1, Loss: 3.904271125793457


Processing epoch 00:  95%|█████████▍| 4587/4850 [21:11<01:13,  3.57it/s]

Epoch: 1, Loss: 3.457730531692505


Processing epoch 00:  95%|█████████▍| 4588/4850 [21:11<01:13,  3.59it/s]

Epoch: 1, Loss: 3.947533130645752


Processing epoch 00:  95%|█████████▍| 4589/4850 [21:12<01:12,  3.59it/s]

Epoch: 1, Loss: 3.053370475769043


Processing epoch 00:  95%|█████████▍| 4590/4850 [21:12<01:12,  3.59it/s]

Epoch: 1, Loss: 2.733736515045166


Processing epoch 00:  95%|█████████▍| 4591/4850 [21:12<01:12,  3.58it/s]

Epoch: 1, Loss: 3.178469181060791


Processing epoch 00:  95%|█████████▍| 4592/4850 [21:12<01:11,  3.60it/s]

Epoch: 1, Loss: 3.8526790142059326


Processing epoch 00:  95%|█████████▍| 4593/4850 [21:13<01:11,  3.59it/s]

Epoch: 1, Loss: 2.8524937629699707


Processing epoch 00:  95%|█████████▍| 4594/4850 [21:13<01:11,  3.60it/s]

Epoch: 1, Loss: 2.9881105422973633


Processing epoch 00:  95%|█████████▍| 4595/4850 [21:13<01:10,  3.61it/s]

Epoch: 1, Loss: 3.934597969055176


Processing epoch 00:  95%|█████████▍| 4596/4850 [21:14<01:10,  3.58it/s]

Epoch: 1, Loss: 3.608933925628662


Processing epoch 00:  95%|█████████▍| 4597/4850 [21:14<01:10,  3.59it/s]

Epoch: 1, Loss: 3.704061508178711


Processing epoch 00:  95%|█████████▍| 4598/4850 [21:14<01:10,  3.60it/s]

Epoch: 1, Loss: 3.518160343170166


Processing epoch 00:  95%|█████████▍| 4599/4850 [21:14<01:09,  3.60it/s]

Epoch: 1, Loss: 3.692966938018799


Processing epoch 00:  95%|█████████▍| 4600/4850 [21:15<01:09,  3.60it/s]

Epoch: 1, Loss: 2.7606124877929688


Processing epoch 00:  95%|█████████▍| 4601/4850 [21:15<01:08,  3.62it/s]

Epoch: 1, Loss: 2.9783549308776855


Processing epoch 00:  95%|█████████▍| 4602/4850 [21:15<01:08,  3.61it/s]

Epoch: 1, Loss: 2.5265421867370605


Processing epoch 00:  95%|█████████▍| 4603/4850 [21:16<01:08,  3.61it/s]

Epoch: 1, Loss: 3.4578022956848145


Processing epoch 00:  95%|█████████▍| 4604/4850 [21:16<01:08,  3.59it/s]

Epoch: 1, Loss: 3.1915745735168457


Processing epoch 00:  95%|█████████▍| 4605/4850 [21:16<01:08,  3.59it/s]

Epoch: 1, Loss: 3.0340723991394043


Processing epoch 00:  95%|█████████▍| 4606/4850 [21:16<01:07,  3.59it/s]

Epoch: 1, Loss: 2.855639934539795


Processing epoch 00:  95%|█████████▍| 4607/4850 [21:17<01:07,  3.59it/s]

Epoch: 1, Loss: 2.6583409309387207


Processing epoch 00:  95%|█████████▌| 4608/4850 [21:17<01:08,  3.52it/s]

Epoch: 1, Loss: 2.5121653079986572


Processing epoch 00:  95%|█████████▌| 4609/4850 [21:17<01:10,  3.41it/s]

Epoch: 1, Loss: 2.6757190227508545


Processing epoch 00:  95%|█████████▌| 4610/4850 [21:18<01:09,  3.43it/s]

Epoch: 1, Loss: 2.7448654174804688


Processing epoch 00:  95%|█████████▌| 4611/4850 [21:18<01:09,  3.45it/s]

Epoch: 1, Loss: 2.5730888843536377


Processing epoch 00:  95%|█████████▌| 4612/4850 [21:18<01:08,  3.48it/s]

Epoch: 1, Loss: 3.170651912689209


Processing epoch 00:  95%|█████████▌| 4613/4850 [21:18<01:08,  3.47it/s]

Epoch: 1, Loss: 3.6457395553588867


Processing epoch 00:  95%|█████████▌| 4614/4850 [21:19<01:07,  3.48it/s]

Epoch: 1, Loss: 2.8218960762023926


Processing epoch 00:  95%|█████████▌| 4615/4850 [21:19<01:09,  3.36it/s]

Epoch: 1, Loss: 2.840574264526367


Processing epoch 00:  95%|█████████▌| 4616/4850 [21:19<01:10,  3.34it/s]

Epoch: 1, Loss: 3.9344592094421387


Processing epoch 00:  95%|█████████▌| 4617/4850 [21:20<01:09,  3.34it/s]

Epoch: 1, Loss: 2.5239906311035156


Processing epoch 00:  95%|█████████▌| 4618/4850 [21:20<01:09,  3.32it/s]

Epoch: 1, Loss: 3.1991896629333496


Processing epoch 00:  95%|█████████▌| 4619/4850 [21:20<01:08,  3.36it/s]

Epoch: 1, Loss: 2.9972481727600098


Processing epoch 00:  95%|█████████▌| 4620/4850 [21:21<01:09,  3.31it/s]

Epoch: 1, Loss: 4.023730754852295


Processing epoch 00:  95%|█████████▌| 4621/4850 [21:21<01:07,  3.39it/s]

Epoch: 1, Loss: 3.524624824523926


Processing epoch 00:  95%|█████████▌| 4622/4850 [21:21<01:05,  3.46it/s]

Epoch: 1, Loss: 3.134258270263672


Processing epoch 00:  95%|█████████▌| 4623/4850 [21:21<01:05,  3.49it/s]

Epoch: 1, Loss: 2.6543679237365723


Processing epoch 00:  95%|█████████▌| 4624/4850 [21:22<01:04,  3.51it/s]

Epoch: 1, Loss: 2.8887689113616943


Processing epoch 00:  95%|█████████▌| 4625/4850 [21:22<01:04,  3.51it/s]

Epoch: 1, Loss: 4.4435648918151855


Processing epoch 00:  95%|█████████▌| 4626/4850 [21:22<01:03,  3.51it/s]

Epoch: 1, Loss: 2.851025104522705


Processing epoch 00:  95%|█████████▌| 4627/4850 [21:22<01:03,  3.53it/s]

Epoch: 1, Loss: 3.178372859954834


Processing epoch 00:  95%|█████████▌| 4628/4850 [21:23<01:02,  3.53it/s]

Epoch: 1, Loss: 2.9286913871765137


Processing epoch 00:  95%|█████████▌| 4629/4850 [21:23<01:02,  3.52it/s]

Epoch: 1, Loss: 2.845028877258301


Processing epoch 00:  95%|█████████▌| 4630/4850 [21:23<01:02,  3.54it/s]

Epoch: 1, Loss: 2.928730010986328


Processing epoch 00:  95%|█████████▌| 4631/4850 [21:24<01:01,  3.57it/s]

Epoch: 1, Loss: 2.2354319095611572


Processing epoch 00:  96%|█████████▌| 4632/4850 [21:24<01:01,  3.57it/s]

Epoch: 1, Loss: 3.016690731048584


Processing epoch 00:  96%|█████████▌| 4633/4850 [21:24<01:00,  3.57it/s]

Epoch: 1, Loss: 3.0311732292175293


Processing epoch 00:  96%|█████████▌| 4634/4850 [21:24<01:00,  3.58it/s]

Epoch: 1, Loss: 2.764741897583008


Processing epoch 00:  96%|█████████▌| 4635/4850 [21:25<01:00,  3.58it/s]

Epoch: 1, Loss: 2.845240592956543


Processing epoch 00:  96%|█████████▌| 4636/4850 [21:25<00:59,  3.57it/s]

Epoch: 1, Loss: 2.704777479171753


Processing epoch 00:  96%|█████████▌| 4637/4850 [21:25<00:59,  3.55it/s]

Epoch: 1, Loss: 2.897991180419922


Processing epoch 00:  96%|█████████▌| 4638/4850 [21:26<00:59,  3.56it/s]

Epoch: 1, Loss: 2.6989293098449707


Processing epoch 00:  96%|█████████▌| 4639/4850 [21:26<00:59,  3.57it/s]

Epoch: 1, Loss: 3.327502727508545


Processing epoch 00:  96%|█████████▌| 4640/4850 [21:26<00:58,  3.58it/s]

Epoch: 1, Loss: 2.944291353225708


Processing epoch 00:  96%|█████████▌| 4641/4850 [21:26<00:58,  3.59it/s]

Epoch: 1, Loss: 2.4080615043640137


Processing epoch 00:  96%|█████████▌| 4642/4850 [21:27<00:57,  3.60it/s]

Epoch: 1, Loss: 2.960455894470215


Processing epoch 00:  96%|█████████▌| 4643/4850 [21:27<00:57,  3.62it/s]

Epoch: 1, Loss: 3.6336112022399902


Processing epoch 00:  96%|█████████▌| 4644/4850 [21:27<00:57,  3.61it/s]

Epoch: 1, Loss: 2.7942049503326416


Processing epoch 00:  96%|█████████▌| 4645/4850 [21:28<00:56,  3.61it/s]

Epoch: 1, Loss: 2.649538516998291


Processing epoch 00:  96%|█████████▌| 4646/4850 [21:28<00:56,  3.59it/s]

Epoch: 1, Loss: 2.7471184730529785


Processing epoch 00:  96%|█████████▌| 4647/4850 [21:28<00:57,  3.56it/s]

Epoch: 1, Loss: 4.4899139404296875


Processing epoch 00:  96%|█████████▌| 4648/4850 [21:28<00:56,  3.56it/s]

Epoch: 1, Loss: 2.792952537536621


Processing epoch 00:  96%|█████████▌| 4649/4850 [21:29<00:56,  3.58it/s]

Epoch: 1, Loss: 2.821040630340576


Processing epoch 00:  96%|█████████▌| 4650/4850 [21:29<00:55,  3.59it/s]

Epoch: 1, Loss: 3.3403139114379883


Processing epoch 00:  96%|█████████▌| 4651/4850 [21:29<00:55,  3.59it/s]

Epoch: 1, Loss: 2.8586225509643555


Processing epoch 00:  96%|█████████▌| 4652/4850 [21:29<00:54,  3.61it/s]

Epoch: 1, Loss: 2.745884895324707


Processing epoch 00:  96%|█████████▌| 4653/4850 [21:30<00:54,  3.61it/s]

Epoch: 1, Loss: 2.8350205421447754


Processing epoch 00:  96%|█████████▌| 4654/4850 [21:30<00:54,  3.59it/s]

Epoch: 1, Loss: 2.7110657691955566


Processing epoch 00:  96%|█████████▌| 4655/4850 [21:30<00:54,  3.59it/s]

Epoch: 1, Loss: 3.0335311889648438


Processing epoch 00:  96%|█████████▌| 4656/4850 [21:31<00:54,  3.55it/s]

Epoch: 1, Loss: 3.017789602279663


Processing epoch 00:  96%|█████████▌| 4657/4850 [21:31<00:55,  3.47it/s]

Epoch: 1, Loss: 2.529545307159424


Processing epoch 00:  96%|█████████▌| 4658/4850 [21:31<00:56,  3.43it/s]

Epoch: 1, Loss: 2.7776989936828613


Processing epoch 00:  96%|█████████▌| 4659/4850 [21:31<00:55,  3.45it/s]

Epoch: 1, Loss: 2.8669581413269043


Processing epoch 00:  96%|█████████▌| 4660/4850 [21:32<00:54,  3.47it/s]

Epoch: 1, Loss: 2.867206573486328


Processing epoch 00:  96%|█████████▌| 4661/4850 [21:32<00:54,  3.48it/s]

Epoch: 1, Loss: 2.624511957168579


Processing epoch 00:  96%|█████████▌| 4662/4850 [21:32<00:54,  3.46it/s]

Epoch: 1, Loss: 3.056793689727783


Processing epoch 00:  96%|█████████▌| 4663/4850 [21:33<00:54,  3.43it/s]

Epoch: 1, Loss: 2.7054195404052734


Processing epoch 00:  96%|█████████▌| 4664/4850 [21:33<00:54,  3.43it/s]

Epoch: 1, Loss: 3.1438684463500977


Processing epoch 00:  96%|█████████▌| 4665/4850 [21:33<00:53,  3.46it/s]

Epoch: 1, Loss: 3.002387046813965


Processing epoch 00:  96%|█████████▌| 4666/4850 [21:34<00:53,  3.47it/s]

Epoch: 1, Loss: 2.4677648544311523


Processing epoch 00:  96%|█████████▌| 4667/4850 [21:34<00:53,  3.45it/s]

Epoch: 1, Loss: 3.511950969696045


Processing epoch 00:  96%|█████████▌| 4668/4850 [21:34<00:52,  3.47it/s]

Epoch: 1, Loss: 3.380303382873535


Processing epoch 00:  96%|█████████▋| 4669/4850 [21:34<00:52,  3.44it/s]

Epoch: 1, Loss: 3.2310144901275635


Processing epoch 00:  96%|█████████▋| 4670/4850 [21:35<00:51,  3.48it/s]

Epoch: 1, Loss: 3.128208637237549


Processing epoch 00:  96%|█████████▋| 4671/4850 [21:35<00:50,  3.52it/s]

Epoch: 1, Loss: 4.173954486846924


Processing epoch 00:  96%|█████████▋| 4672/4850 [21:35<00:50,  3.53it/s]

Epoch: 1, Loss: 3.1185741424560547


Processing epoch 00:  96%|█████████▋| 4673/4850 [21:35<00:50,  3.52it/s]

Epoch: 1, Loss: 2.520562171936035


Processing epoch 00:  96%|█████████▋| 4674/4850 [21:36<00:49,  3.54it/s]

Epoch: 1, Loss: 2.4763503074645996


Processing epoch 00:  96%|█████████▋| 4675/4850 [21:36<00:49,  3.55it/s]

Epoch: 1, Loss: 3.0066170692443848


Processing epoch 00:  96%|█████████▋| 4676/4850 [21:36<00:48,  3.57it/s]

Epoch: 1, Loss: 2.9175901412963867


Processing epoch 00:  96%|█████████▋| 4677/4850 [21:37<00:48,  3.58it/s]

Epoch: 1, Loss: 3.0391411781311035


Processing epoch 00:  96%|█████████▋| 4678/4850 [21:37<00:47,  3.59it/s]

Epoch: 1, Loss: 3.128592014312744


Processing epoch 00:  96%|█████████▋| 4679/4850 [21:37<00:47,  3.59it/s]

Epoch: 1, Loss: 2.505093574523926


Processing epoch 00:  96%|█████████▋| 4680/4850 [21:37<00:47,  3.59it/s]

Epoch: 1, Loss: 2.8283677101135254


Processing epoch 00:  97%|█████████▋| 4681/4850 [21:38<00:46,  3.61it/s]

Epoch: 1, Loss: 3.458902597427368


Processing epoch 00:  97%|█████████▋| 4682/4850 [21:38<00:46,  3.62it/s]

Epoch: 1, Loss: 3.8050131797790527


Processing epoch 00:  97%|█████████▋| 4683/4850 [21:38<00:46,  3.63it/s]

Epoch: 1, Loss: 2.9340829849243164


Processing epoch 00:  97%|█████████▋| 4684/4850 [21:39<00:46,  3.61it/s]

Epoch: 1, Loss: 2.6332616806030273


Processing epoch 00:  97%|█████████▋| 4685/4850 [21:39<00:45,  3.60it/s]

Epoch: 1, Loss: 2.921480178833008


Processing epoch 00:  97%|█████████▋| 4686/4850 [21:39<00:45,  3.59it/s]

Epoch: 1, Loss: 2.8471062183380127


Processing epoch 00:  97%|█████████▋| 4687/4850 [21:39<00:45,  3.59it/s]

Epoch: 1, Loss: 2.5376124382019043


Processing epoch 00:  97%|█████████▋| 4688/4850 [21:40<00:44,  3.61it/s]

Epoch: 1, Loss: 2.9713759422302246


Processing epoch 00:  97%|█████████▋| 4689/4850 [21:40<00:44,  3.60it/s]

Epoch: 1, Loss: 2.567221164703369


Processing epoch 00:  97%|█████████▋| 4690/4850 [21:40<00:44,  3.58it/s]

Epoch: 1, Loss: 3.064211368560791


Processing epoch 00:  97%|█████████▋| 4691/4850 [21:41<00:44,  3.57it/s]

Epoch: 1, Loss: 2.9887919425964355


Processing epoch 00:  97%|█████████▋| 4692/4850 [21:41<00:44,  3.59it/s]

Epoch: 1, Loss: 3.4027938842773438


Processing epoch 00:  97%|█████████▋| 4693/4850 [21:41<00:43,  3.58it/s]

Epoch: 1, Loss: 3.0031943321228027


Processing epoch 00:  97%|█████████▋| 4694/4850 [21:41<00:43,  3.60it/s]

Epoch: 1, Loss: 2.5960898399353027


Processing epoch 00:  97%|█████████▋| 4695/4850 [21:42<00:43,  3.60it/s]

Epoch: 1, Loss: 3.186123847961426


Processing epoch 00:  97%|█████████▋| 4696/4850 [21:42<00:42,  3.61it/s]

Epoch: 1, Loss: 2.8023269176483154


Processing epoch 00:  97%|█████████▋| 4697/4850 [21:42<00:42,  3.60it/s]

Epoch: 1, Loss: 2.802536964416504


Processing epoch 00:  97%|█████████▋| 4698/4850 [21:42<00:42,  3.60it/s]

Epoch: 1, Loss: 3.0936403274536133


Processing epoch 00:  97%|█████████▋| 4699/4850 [21:43<00:41,  3.60it/s]

Epoch: 1, Loss: 2.6471285820007324


Processing epoch 00:  97%|█████████▋| 4700/4850 [21:43<00:41,  3.60it/s]

Epoch: 1, Loss: 2.8059563636779785


Processing epoch 00:  97%|█████████▋| 4701/4850 [21:43<00:41,  3.62it/s]

Epoch: 1, Loss: 2.552335739135742


Processing epoch 00:  97%|█████████▋| 4702/4850 [21:44<00:40,  3.62it/s]

Epoch: 1, Loss: 3.026693820953369


Processing epoch 00:  97%|█████████▋| 4703/4850 [21:44<00:40,  3.61it/s]

Epoch: 1, Loss: 3.5507125854492188


Processing epoch 00:  97%|█████████▋| 4704/4850 [21:44<00:40,  3.62it/s]

Epoch: 1, Loss: 3.86008358001709


Processing epoch 00:  97%|█████████▋| 4705/4850 [21:44<00:40,  3.62it/s]

Epoch: 1, Loss: 3.22249698638916


Processing epoch 00:  97%|█████████▋| 4706/4850 [21:45<00:40,  3.58it/s]

Epoch: 1, Loss: 3.8787379264831543


Processing epoch 00:  97%|█████████▋| 4707/4850 [21:45<00:40,  3.51it/s]

Epoch: 1, Loss: 2.76033353805542


Processing epoch 00:  97%|█████████▋| 4708/4850 [21:45<00:40,  3.49it/s]

Epoch: 1, Loss: 2.9082305431365967


Processing epoch 00:  97%|█████████▋| 4709/4850 [21:46<00:40,  3.47it/s]

Epoch: 1, Loss: 2.627488374710083


Processing epoch 00:  97%|█████████▋| 4710/4850 [21:46<00:40,  3.45it/s]

Epoch: 1, Loss: 3.029849052429199


Processing epoch 00:  97%|█████████▋| 4711/4850 [21:46<00:40,  3.43it/s]

Epoch: 1, Loss: 2.7875123023986816


Processing epoch 00:  97%|█████████▋| 4712/4850 [21:46<00:40,  3.39it/s]

Epoch: 1, Loss: 2.8348941802978516


Processing epoch 00:  97%|█████████▋| 4713/4850 [21:47<00:41,  3.31it/s]

Epoch: 1, Loss: 2.891756057739258


Processing epoch 00:  97%|█████████▋| 4714/4850 [21:47<00:41,  3.28it/s]

Epoch: 1, Loss: 3.7321577072143555


Processing epoch 00:  97%|█████████▋| 4715/4850 [21:47<00:40,  3.30it/s]

Epoch: 1, Loss: 3.0137672424316406


Processing epoch 00:  97%|█████████▋| 4716/4850 [21:48<00:40,  3.29it/s]

Epoch: 1, Loss: 3.529022455215454


Processing epoch 00:  97%|█████████▋| 4717/4850 [21:48<00:39,  3.38it/s]

Epoch: 1, Loss: 2.877822160720825


Processing epoch 00:  97%|█████████▋| 4718/4850 [21:48<00:38,  3.44it/s]

Epoch: 1, Loss: 2.676016330718994


Processing epoch 00:  97%|█████████▋| 4719/4850 [21:49<00:37,  3.48it/s]

Epoch: 1, Loss: 2.7000045776367188


Processing epoch 00:  97%|█████████▋| 4720/4850 [21:49<00:36,  3.51it/s]

Epoch: 1, Loss: 2.8301544189453125


Processing epoch 00:  97%|█████████▋| 4721/4850 [21:49<00:36,  3.50it/s]

Epoch: 1, Loss: 2.349000930786133


Processing epoch 00:  97%|█████████▋| 4722/4850 [21:49<00:36,  3.54it/s]

Epoch: 1, Loss: 3.309854030609131


Processing epoch 00:  97%|█████████▋| 4723/4850 [21:50<00:35,  3.57it/s]

Epoch: 1, Loss: 3.108543872833252


Processing epoch 00:  97%|█████████▋| 4724/4850 [21:50<00:35,  3.55it/s]

Epoch: 1, Loss: 2.7406704425811768


Processing epoch 00:  97%|█████████▋| 4725/4850 [21:50<00:35,  3.56it/s]

Epoch: 1, Loss: 2.8036608695983887


Processing epoch 00:  97%|█████████▋| 4726/4850 [21:50<00:34,  3.57it/s]

Epoch: 1, Loss: 3.093925952911377


Processing epoch 00:  97%|█████████▋| 4727/4850 [21:51<00:34,  3.58it/s]

Epoch: 1, Loss: 2.6840286254882812


Processing epoch 00:  97%|█████████▋| 4728/4850 [21:51<00:33,  3.60it/s]

Epoch: 1, Loss: 2.629072666168213


Processing epoch 00:  98%|█████████▊| 4729/4850 [21:51<00:33,  3.60it/s]

Epoch: 1, Loss: 3.8487143516540527


Processing epoch 00:  98%|█████████▊| 4730/4850 [21:52<00:33,  3.57it/s]

Epoch: 1, Loss: 2.3968112468719482


Processing epoch 00:  98%|█████████▊| 4731/4850 [21:52<00:33,  3.59it/s]

Epoch: 1, Loss: 3.479400634765625


Processing epoch 00:  98%|█████████▊| 4732/4850 [21:52<00:32,  3.58it/s]

Epoch: 1, Loss: 3.3558225631713867


Processing epoch 00:  98%|█████████▊| 4733/4850 [21:52<00:32,  3.59it/s]

Epoch: 1, Loss: 3.355398178100586


Processing epoch 00:  98%|█████████▊| 4734/4850 [21:53<00:32,  3.59it/s]

Epoch: 1, Loss: 2.598142623901367


Processing epoch 00:  98%|█████████▊| 4735/4850 [21:53<00:31,  3.60it/s]

Epoch: 1, Loss: 3.096503257751465


Processing epoch 00:  98%|█████████▊| 4736/4850 [21:53<00:31,  3.60it/s]

Epoch: 1, Loss: 2.3000752925872803


Processing epoch 00:  98%|█████████▊| 4737/4850 [21:54<00:31,  3.59it/s]

Epoch: 1, Loss: 3.855278491973877


Processing epoch 00:  98%|█████████▊| 4738/4850 [21:54<00:31,  3.60it/s]

Epoch: 1, Loss: 3.3547933101654053


Processing epoch 00:  98%|█████████▊| 4739/4850 [21:54<00:30,  3.60it/s]

Epoch: 1, Loss: 2.9498322010040283


Processing epoch 00:  98%|█████████▊| 4740/4850 [21:54<00:30,  3.58it/s]

Epoch: 1, Loss: 3.005671501159668


Processing epoch 00:  98%|█████████▊| 4741/4850 [21:55<00:30,  3.60it/s]

Epoch: 1, Loss: 2.946495532989502


Processing epoch 00:  98%|█████████▊| 4742/4850 [21:55<00:29,  3.60it/s]

Epoch: 1, Loss: 2.7157320976257324


Processing epoch 00:  98%|█████████▊| 4743/4850 [21:55<00:29,  3.61it/s]

Epoch: 1, Loss: 2.7934951782226562


Processing epoch 00:  98%|█████████▊| 4744/4850 [21:55<00:29,  3.62it/s]

Epoch: 1, Loss: 2.9463443756103516


Processing epoch 00:  98%|█████████▊| 4745/4850 [21:56<00:29,  3.60it/s]

Epoch: 1, Loss: 3.0272603034973145


Processing epoch 00:  98%|█████████▊| 4746/4850 [21:56<00:28,  3.59it/s]

Epoch: 1, Loss: 2.1066107749938965


Processing epoch 00:  98%|█████████▊| 4747/4850 [21:56<00:28,  3.58it/s]

Epoch: 1, Loss: 3.369804859161377


Processing epoch 00:  98%|█████████▊| 4748/4850 [21:57<00:28,  3.58it/s]

Epoch: 1, Loss: 2.9985013008117676


Processing epoch 00:  98%|█████████▊| 4749/4850 [21:57<00:28,  3.59it/s]

Epoch: 1, Loss: 3.4323668479919434


Processing epoch 00:  98%|█████████▊| 4750/4850 [21:57<00:28,  3.56it/s]

Epoch: 1, Loss: 2.570143461227417


Processing epoch 00:  98%|█████████▊| 4751/4850 [21:57<00:27,  3.57it/s]

Epoch: 1, Loss: 3.551607608795166


Processing epoch 00:  98%|█████████▊| 4752/4850 [21:58<00:27,  3.58it/s]

Epoch: 1, Loss: 4.033817768096924


Processing epoch 00:  98%|█████████▊| 4753/4850 [21:58<00:27,  3.51it/s]

Epoch: 1, Loss: 2.6140010356903076


Processing epoch 00:  98%|█████████▊| 4754/4850 [21:58<00:27,  3.49it/s]

Epoch: 1, Loss: 2.7491393089294434


Processing epoch 00:  98%|█████████▊| 4755/4850 [21:59<00:27,  3.52it/s]

Epoch: 1, Loss: 3.736844539642334


Processing epoch 00:  98%|█████████▊| 4756/4850 [21:59<00:26,  3.53it/s]

Epoch: 1, Loss: 2.902031421661377


Processing epoch 00:  98%|█████████▊| 4757/4850 [21:59<00:26,  3.53it/s]

Epoch: 1, Loss: 2.6132006645202637


Processing epoch 00:  98%|█████████▊| 4758/4850 [21:59<00:26,  3.54it/s]

Epoch: 1, Loss: 2.6354756355285645


Processing epoch 00:  98%|█████████▊| 4759/4850 [22:00<00:25,  3.53it/s]

Epoch: 1, Loss: 2.664307117462158


Processing epoch 00:  98%|█████████▊| 4760/4850 [22:00<00:25,  3.47it/s]

Epoch: 1, Loss: 2.5799732208251953


Processing epoch 00:  98%|█████████▊| 4761/4850 [22:00<00:26,  3.38it/s]

Epoch: 1, Loss: 2.436677932739258


Processing epoch 00:  98%|█████████▊| 4762/4850 [22:01<00:25,  3.40it/s]

Epoch: 1, Loss: 3.0082435607910156


Processing epoch 00:  98%|█████████▊| 4763/4850 [22:01<00:25,  3.43it/s]

Epoch: 1, Loss: 2.8050618171691895


Processing epoch 00:  98%|█████████▊| 4764/4850 [22:01<00:24,  3.44it/s]

Epoch: 1, Loss: 3.13372802734375


Processing epoch 00:  98%|█████████▊| 4765/4850 [22:01<00:24,  3.40it/s]

Epoch: 1, Loss: 3.00026273727417


Processing epoch 00:  98%|█████████▊| 4766/4850 [22:02<00:24,  3.46it/s]

Epoch: 1, Loss: 2.6187243461608887


Processing epoch 00:  98%|█████████▊| 4767/4850 [22:02<00:23,  3.47it/s]

Epoch: 1, Loss: 3.280625343322754


Processing epoch 00:  98%|█████████▊| 4768/4850 [22:02<00:23,  3.50it/s]

Epoch: 1, Loss: 3.084850788116455


Processing epoch 00:  98%|█████████▊| 4769/4850 [22:03<00:22,  3.53it/s]

Epoch: 1, Loss: 3.543466091156006


Processing epoch 00:  98%|█████████▊| 4770/4850 [22:03<00:22,  3.55it/s]

Epoch: 1, Loss: 3.1107733249664307


Processing epoch 00:  98%|█████████▊| 4771/4850 [22:03<00:22,  3.55it/s]

Epoch: 1, Loss: 2.934542179107666


Processing epoch 00:  98%|█████████▊| 4772/4850 [22:03<00:22,  3.53it/s]

Epoch: 1, Loss: 3.212092399597168


Processing epoch 00:  98%|█████████▊| 4773/4850 [22:04<00:21,  3.55it/s]

Epoch: 1, Loss: 2.7798831462860107


Processing epoch 00:  98%|█████████▊| 4774/4850 [22:04<00:21,  3.54it/s]

Epoch: 1, Loss: 3.3251829147338867


Processing epoch 00:  98%|█████████▊| 4775/4850 [22:04<00:21,  3.53it/s]

Epoch: 1, Loss: 2.7568440437316895


Processing epoch 00:  98%|█████████▊| 4776/4850 [22:05<00:20,  3.54it/s]

Epoch: 1, Loss: 2.413200855255127


Processing epoch 00:  98%|█████████▊| 4777/4850 [22:05<00:20,  3.57it/s]

Epoch: 1, Loss: 2.8980813026428223


Processing epoch 00:  99%|█████████▊| 4778/4850 [22:05<00:20,  3.58it/s]

Epoch: 1, Loss: 2.894818067550659


Processing epoch 00:  99%|█████████▊| 4779/4850 [22:05<00:19,  3.58it/s]

Epoch: 1, Loss: 2.4429502487182617


Processing epoch 00:  99%|█████████▊| 4780/4850 [22:06<00:19,  3.59it/s]

Epoch: 1, Loss: 2.892580509185791


Processing epoch 00:  99%|█████████▊| 4781/4850 [22:06<00:19,  3.59it/s]

Epoch: 1, Loss: 2.6642212867736816


Processing epoch 00:  99%|█████████▊| 4782/4850 [22:06<00:18,  3.58it/s]

Epoch: 1, Loss: 2.343252420425415


Processing epoch 00:  99%|█████████▊| 4783/4850 [22:07<00:18,  3.58it/s]

Epoch: 1, Loss: 3.108156681060791


Processing epoch 00:  99%|█████████▊| 4784/4850 [22:07<00:18,  3.58it/s]

Epoch: 1, Loss: 3.028062343597412


Processing epoch 00:  99%|█████████▊| 4785/4850 [22:07<00:18,  3.57it/s]

Epoch: 1, Loss: 2.6638870239257812


Processing epoch 00:  99%|█████████▊| 4786/4850 [22:07<00:17,  3.56it/s]

Epoch: 1, Loss: 3.460787773132324


Processing epoch 00:  99%|█████████▊| 4787/4850 [22:08<00:17,  3.56it/s]

Epoch: 1, Loss: 2.8195862770080566


Processing epoch 00:  99%|█████████▊| 4788/4850 [22:08<00:17,  3.55it/s]

Epoch: 1, Loss: 3.2435009479522705


Processing epoch 00:  99%|█████████▊| 4789/4850 [22:08<00:17,  3.57it/s]

Epoch: 1, Loss: 3.0866193771362305


Processing epoch 00:  99%|█████████▉| 4790/4850 [22:08<00:16,  3.55it/s]

Epoch: 1, Loss: 3.455418825149536


Processing epoch 00:  99%|█████████▉| 4791/4850 [22:09<00:16,  3.56it/s]

Epoch: 1, Loss: 2.938076972961426


Processing epoch 00:  99%|█████████▉| 4792/4850 [22:09<00:16,  3.56it/s]

Epoch: 1, Loss: 2.7132225036621094


Processing epoch 00:  99%|█████████▉| 4793/4850 [22:09<00:15,  3.57it/s]

Epoch: 1, Loss: 2.7432446479797363


Processing epoch 00:  99%|█████████▉| 4794/4850 [22:10<00:15,  3.58it/s]

Epoch: 1, Loss: 3.137070894241333


Processing epoch 00:  99%|█████████▉| 4795/4850 [22:10<00:15,  3.58it/s]

Epoch: 1, Loss: 2.97312593460083


Processing epoch 00:  99%|█████████▉| 4796/4850 [22:10<00:15,  3.58it/s]

Epoch: 1, Loss: 2.6634039878845215


Processing epoch 00:  99%|█████████▉| 4797/4850 [22:10<00:14,  3.57it/s]

Epoch: 1, Loss: 2.884906530380249


Processing epoch 00:  99%|█████████▉| 4798/4850 [22:11<00:14,  3.56it/s]

Epoch: 1, Loss: 3.036099672317505


Processing epoch 00:  99%|█████████▉| 4799/4850 [22:11<00:14,  3.59it/s]

Epoch: 1, Loss: 3.1908867359161377


Processing epoch 00:  99%|█████████▉| 4800/4850 [22:11<00:13,  3.59it/s]

Epoch: 1, Loss: 2.963312864303589


Processing epoch 00:  99%|█████████▉| 4801/4850 [22:12<00:13,  3.57it/s]

Epoch: 1, Loss: 2.9242520332336426


Processing epoch 00:  99%|█████████▉| 4802/4850 [22:12<00:13,  3.50it/s]

Epoch: 1, Loss: 3.160632371902466


Processing epoch 00:  99%|█████████▉| 4803/4850 [22:12<00:13,  3.44it/s]

Epoch: 1, Loss: 2.579256534576416


Processing epoch 00:  99%|█████████▉| 4804/4850 [22:12<00:13,  3.42it/s]

Epoch: 1, Loss: 3.2813827991485596


Processing epoch 00:  99%|█████████▉| 4805/4850 [22:13<00:13,  3.45it/s]

Epoch: 1, Loss: 3.181037425994873


Processing epoch 00:  99%|█████████▉| 4806/4850 [22:13<00:12,  3.45it/s]

Epoch: 1, Loss: 2.958252429962158


Processing epoch 00:  99%|█████████▉| 4807/4850 [22:13<00:12,  3.48it/s]

Epoch: 1, Loss: 2.589184284210205


Processing epoch 00:  99%|█████████▉| 4808/4850 [22:14<00:12,  3.44it/s]

Epoch: 1, Loss: 3.493227005004883


Processing epoch 00:  99%|█████████▉| 4809/4850 [22:14<00:11,  3.42it/s]

Epoch: 1, Loss: 2.5070114135742188


Processing epoch 00:  99%|█████████▉| 4810/4850 [22:14<00:11,  3.42it/s]

Epoch: 1, Loss: 2.188783884048462


Processing epoch 00:  99%|█████████▉| 4811/4850 [22:15<00:11,  3.41it/s]

Epoch: 1, Loss: 2.67472505569458


Processing epoch 00:  99%|█████████▉| 4812/4850 [22:15<00:11,  3.40it/s]

Epoch: 1, Loss: 2.6706061363220215


Processing epoch 00:  99%|█████████▉| 4813/4850 [22:15<00:10,  3.36it/s]

Epoch: 1, Loss: 2.8920791149139404


Processing epoch 00:  99%|█████████▉| 4814/4850 [22:15<00:10,  3.35it/s]

Epoch: 1, Loss: 2.9546585083007812


Processing epoch 00:  99%|█████████▉| 4815/4850 [22:16<00:10,  3.41it/s]

Epoch: 1, Loss: 2.8778958320617676


Processing epoch 00:  99%|█████████▉| 4816/4850 [22:16<00:09,  3.48it/s]

Epoch: 1, Loss: 3.7613282203674316


Processing epoch 00:  99%|█████████▉| 4817/4850 [22:16<00:09,  3.51it/s]

Epoch: 1, Loss: 2.825723171234131


Processing epoch 00:  99%|█████████▉| 4818/4850 [22:17<00:09,  3.53it/s]

Epoch: 1, Loss: 2.469616174697876


Processing epoch 00:  99%|█████████▉| 4819/4850 [22:17<00:08,  3.55it/s]

Epoch: 1, Loss: 2.6640024185180664


Processing epoch 00:  99%|█████████▉| 4820/4850 [22:17<00:08,  3.56it/s]

Epoch: 1, Loss: 2.384380340576172


Processing epoch 00:  99%|█████████▉| 4821/4850 [22:17<00:08,  3.58it/s]

Epoch: 1, Loss: 2.787506103515625


Processing epoch 00:  99%|█████████▉| 4822/4850 [22:18<00:07,  3.58it/s]

Epoch: 1, Loss: 3.9324846267700195


Processing epoch 00:  99%|█████████▉| 4823/4850 [22:18<00:07,  3.53it/s]

Epoch: 1, Loss: 2.6503100395202637


Processing epoch 00:  99%|█████████▉| 4824/4850 [22:18<00:07,  3.55it/s]

Epoch: 1, Loss: 2.6389780044555664


Processing epoch 00:  99%|█████████▉| 4825/4850 [22:18<00:07,  3.55it/s]

Epoch: 1, Loss: 2.57442569732666


Processing epoch 00: 100%|█████████▉| 4826/4850 [22:19<00:06,  3.55it/s]

Epoch: 1, Loss: 2.6366477012634277


Processing epoch 00: 100%|█████████▉| 4827/4850 [22:19<00:06,  3.54it/s]

Epoch: 1, Loss: 2.7278952598571777


Processing epoch 00: 100%|█████████▉| 4828/4850 [22:19<00:06,  3.57it/s]

Epoch: 1, Loss: 3.1021361351013184


Processing epoch 00: 100%|█████████▉| 4829/4850 [22:20<00:05,  3.58it/s]

Epoch: 1, Loss: 3.066648483276367


Processing epoch 00: 100%|█████████▉| 4830/4850 [22:20<00:05,  3.57it/s]

Epoch: 1, Loss: 2.986827850341797


Processing epoch 00: 100%|█████████▉| 4831/4850 [22:20<00:05,  3.57it/s]

Epoch: 1, Loss: 3.691551446914673


Processing epoch 00: 100%|█████████▉| 4832/4850 [22:20<00:05,  3.57it/s]

Epoch: 1, Loss: 3.6924843788146973


Processing epoch 00: 100%|█████████▉| 4833/4850 [22:21<00:04,  3.59it/s]

Epoch: 1, Loss: 3.3028435707092285


Processing epoch 00: 100%|█████████▉| 4834/4850 [22:21<00:04,  3.55it/s]

Epoch: 1, Loss: 3.195995330810547


Processing epoch 00: 100%|█████████▉| 4835/4850 [22:21<00:04,  3.57it/s]

Epoch: 1, Loss: 3.1804909706115723


Processing epoch 00: 100%|█████████▉| 4836/4850 [22:22<00:03,  3.56it/s]

Epoch: 1, Loss: 2.819824695587158


Processing epoch 00: 100%|█████████▉| 4837/4850 [22:22<00:03,  3.56it/s]

Epoch: 1, Loss: 2.845019817352295


Processing epoch 00: 100%|█████████▉| 4838/4850 [22:22<00:03,  3.58it/s]

Epoch: 1, Loss: 2.960010051727295


Processing epoch 00: 100%|█████████▉| 4839/4850 [22:22<00:03,  3.57it/s]

Epoch: 1, Loss: 2.6915082931518555


Processing epoch 00: 100%|█████████▉| 4840/4850 [22:23<00:02,  3.59it/s]

Epoch: 1, Loss: 3.5991721153259277


Processing epoch 00: 100%|█████████▉| 4841/4850 [22:23<00:02,  3.60it/s]

Epoch: 1, Loss: 2.853780746459961


Processing epoch 00: 100%|█████████▉| 4842/4850 [22:23<00:02,  3.59it/s]

Epoch: 1, Loss: 2.6534810066223145


Processing epoch 00: 100%|█████████▉| 4843/4850 [22:24<00:01,  3.60it/s]

Epoch: 1, Loss: 2.9217357635498047


Processing epoch 00: 100%|█████████▉| 4844/4850 [22:24<00:01,  3.61it/s]

Epoch: 1, Loss: 3.611177921295166


Processing epoch 00: 100%|█████████▉| 4845/4850 [22:24<00:01,  3.60it/s]

Epoch: 1, Loss: 3.0696821212768555


Processing epoch 00: 100%|█████████▉| 4846/4850 [22:24<00:01,  3.60it/s]

Epoch: 1, Loss: 2.8852322101593018


Processing epoch 00: 100%|█████████▉| 4847/4850 [22:25<00:00,  3.59it/s]

Epoch: 1, Loss: 2.5836758613586426


Processing epoch 00: 100%|█████████▉| 4848/4850 [22:25<00:00,  3.59it/s]

Epoch: 1, Loss: 3.0840744972229004


Processing epoch 00: 100%|█████████▉| 4849/4850 [22:25<00:00,  3.59it/s]

Epoch: 1, Loss: 2.9566197395324707


Processing epoch 00: 100%|██████████| 4850/4850 [22:25<00:00,  3.60it/s]

Epoch: 1, Loss: 2.7259433269500732



Processing epoch 01:   0%|          | 1/4850 [00:00<29:12,  2.77it/s]

Epoch: 2, Loss: 3.0096163749694824


Processing epoch 01:   0%|          | 2/4850 [00:00<25:04,  3.22it/s]

Epoch: 2, Loss: 2.7149415016174316


Processing epoch 01:   0%|          | 3/4850 [00:00<23:42,  3.41it/s]

Epoch: 2, Loss: 3.1367692947387695


Processing epoch 01:   0%|          | 4/4850 [00:01<23:13,  3.48it/s]

Epoch: 2, Loss: 2.4705018997192383


Processing epoch 01:   0%|          | 5/4850 [00:01<22:59,  3.51it/s]

Epoch: 2, Loss: 2.7881312370300293


Processing epoch 01:   0%|          | 6/4850 [00:01<22:45,  3.55it/s]

Epoch: 2, Loss: 3.1639859676361084


Processing epoch 01:   0%|          | 7/4850 [00:02<22:50,  3.53it/s]

Epoch: 2, Loss: 2.2689661979675293


Processing epoch 01:   0%|          | 8/4850 [00:02<22:47,  3.54it/s]

Epoch: 2, Loss: 2.7008328437805176


Processing epoch 01:   0%|          | 9/4850 [00:02<22:36,  3.57it/s]

Epoch: 2, Loss: 2.8133764266967773


Processing epoch 01:   0%|          | 10/4850 [00:02<22:25,  3.60it/s]

Epoch: 2, Loss: 2.6130475997924805


Processing epoch 01:   0%|          | 11/4850 [00:03<22:33,  3.58it/s]

Epoch: 2, Loss: 2.5759949684143066


Processing epoch 01:   0%|          | 12/4850 [00:03<22:28,  3.59it/s]

Epoch: 2, Loss: 2.1981382369995117


Processing epoch 01:   0%|          | 13/4850 [00:03<22:23,  3.60it/s]

Epoch: 2, Loss: 2.527925729751587


Processing epoch 01:   0%|          | 14/4850 [00:03<22:14,  3.62it/s]

Epoch: 2, Loss: 2.9765708446502686


Processing epoch 01:   0%|          | 15/4850 [00:04<22:16,  3.62it/s]

Epoch: 2, Loss: 2.091826915740967


Processing epoch 01:   0%|          | 16/4850 [00:04<22:22,  3.60it/s]

Epoch: 2, Loss: 3.211364269256592


Processing epoch 01:   0%|          | 17/4850 [00:04<22:18,  3.61it/s]

Epoch: 2, Loss: 2.8805394172668457


Processing epoch 01:   0%|          | 18/4850 [00:05<22:17,  3.61it/s]

Epoch: 2, Loss: 3.1507015228271484


Processing epoch 01:   0%|          | 19/4850 [00:05<23:40,  3.40it/s]

Epoch: 2, Loss: 2.373566150665283


Processing epoch 01:   0%|          | 20/4850 [00:05<23:46,  3.39it/s]

Epoch: 2, Loss: 2.2083773612976074


Processing epoch 01:   0%|          | 21/4850 [00:05<23:33,  3.42it/s]

Epoch: 2, Loss: 2.148573398590088


Processing epoch 01:   0%|          | 22/4850 [00:06<23:34,  3.41it/s]

Epoch: 2, Loss: 3.0181374549865723


Processing epoch 01:   0%|          | 23/4850 [00:06<23:48,  3.38it/s]

Epoch: 2, Loss: 2.4343159198760986


Processing epoch 01:   0%|          | 24/4850 [00:06<23:33,  3.41it/s]

Epoch: 2, Loss: 2.486319065093994


Processing epoch 01:   1%|          | 25/4850 [00:07<23:43,  3.39it/s]

Epoch: 2, Loss: 3.149271011352539


Processing epoch 01:   1%|          | 26/4850 [00:07<23:55,  3.36it/s]

Epoch: 2, Loss: 2.4610085487365723


Processing epoch 01:   1%|          | 27/4850 [00:07<23:54,  3.36it/s]

Epoch: 2, Loss: 2.7427682876586914


Processing epoch 01:   1%|          | 28/4850 [00:08<23:39,  3.40it/s]

Epoch: 2, Loss: 3.144228935241699


Processing epoch 01:   1%|          | 29/4850 [00:08<23:50,  3.37it/s]

Epoch: 2, Loss: 3.809288501739502


Processing epoch 01:   1%|          | 30/4850 [00:08<24:07,  3.33it/s]

Epoch: 2, Loss: 2.761923313140869


Processing epoch 01:   1%|          | 31/4850 [00:08<23:33,  3.41it/s]

Epoch: 2, Loss: 2.904428482055664


Processing epoch 01:   1%|          | 32/4850 [00:09<23:10,  3.47it/s]

Epoch: 2, Loss: 3.57189679145813


Processing epoch 01:   1%|          | 33/4850 [00:09<22:54,  3.51it/s]

Epoch: 2, Loss: 2.4832570552825928


Processing epoch 01:   1%|          | 34/4850 [00:09<22:41,  3.54it/s]

Epoch: 2, Loss: 2.674405097961426


Processing epoch 01:   1%|          | 35/4850 [00:10<22:27,  3.57it/s]

Epoch: 2, Loss: 3.5840959548950195


Processing epoch 01:   1%|          | 36/4850 [00:10<22:29,  3.57it/s]

Epoch: 2, Loss: 2.7324609756469727


Processing epoch 01:   1%|          | 37/4850 [00:10<22:28,  3.57it/s]

Epoch: 2, Loss: 2.844897747039795


Processing epoch 01:   1%|          | 38/4850 [00:10<22:21,  3.59it/s]

Epoch: 2, Loss: 3.1174674034118652


Processing epoch 01:   1%|          | 39/4850 [00:11<22:21,  3.59it/s]

Epoch: 2, Loss: 2.5826077461242676


Processing epoch 01:   1%|          | 40/4850 [00:11<22:25,  3.57it/s]

Epoch: 2, Loss: 3.7832465171813965


Processing epoch 01:   1%|          | 41/4850 [00:11<22:18,  3.59it/s]

Epoch: 2, Loss: 3.6499788761138916


Processing epoch 01:   1%|          | 42/4850 [00:12<22:15,  3.60it/s]

Epoch: 2, Loss: 2.6035566329956055


Processing epoch 01:   1%|          | 43/4850 [00:12<22:14,  3.60it/s]

Epoch: 2, Loss: 3.0948872566223145


Processing epoch 01:   1%|          | 44/4850 [00:12<22:12,  3.61it/s]

Epoch: 2, Loss: 3.163931369781494


Processing epoch 01:   1%|          | 45/4850 [00:12<22:14,  3.60it/s]

Epoch: 2, Loss: 2.698141098022461


Processing epoch 01:   1%|          | 46/4850 [00:13<22:23,  3.58it/s]

Epoch: 2, Loss: 3.0676393508911133


Processing epoch 01:   1%|          | 47/4850 [00:13<22:26,  3.57it/s]

Epoch: 2, Loss: 2.911288022994995


Processing epoch 01:   1%|          | 48/4850 [00:13<22:28,  3.56it/s]

Epoch: 2, Loss: 2.5297439098358154


Processing epoch 01:   1%|          | 49/4850 [00:13<22:27,  3.56it/s]

Epoch: 2, Loss: 2.7076940536499023


Processing epoch 01:   1%|          | 50/4850 [00:14<22:30,  3.55it/s]

Epoch: 2, Loss: 2.611210823059082


Processing epoch 01:   1%|          | 51/4850 [00:14<22:34,  3.54it/s]

Epoch: 2, Loss: 2.4893956184387207


Processing epoch 01:   1%|          | 52/4850 [00:14<22:34,  3.54it/s]

Epoch: 2, Loss: 2.3921589851379395


Processing epoch 01:   1%|          | 53/4850 [00:15<22:35,  3.54it/s]

Epoch: 2, Loss: 2.4676578044891357


Processing epoch 01:   1%|          | 54/4850 [00:15<22:34,  3.54it/s]

Epoch: 2, Loss: 3.256344795227051


Processing epoch 01:   1%|          | 55/4850 [00:15<22:34,  3.54it/s]

Epoch: 2, Loss: 4.548624038696289


Processing epoch 01:   1%|          | 56/4850 [00:15<22:27,  3.56it/s]

Epoch: 2, Loss: 3.622887134552002


Processing epoch 01:   1%|          | 57/4850 [00:16<22:31,  3.55it/s]

Epoch: 2, Loss: 2.6631827354431152


Processing epoch 01:   1%|          | 58/4850 [00:16<22:38,  3.53it/s]

Epoch: 2, Loss: 2.9147098064422607


Processing epoch 01:   1%|          | 59/4850 [00:16<22:35,  3.53it/s]

Epoch: 2, Loss: 2.757296323776245


Processing epoch 01:   1%|          | 60/4850 [00:17<22:26,  3.56it/s]

Epoch: 2, Loss: 3.162529468536377


Processing epoch 01:   1%|▏         | 61/4850 [00:17<22:22,  3.57it/s]

Epoch: 2, Loss: 2.618195056915283


Processing epoch 01:   1%|▏         | 62/4850 [00:17<22:25,  3.56it/s]

Epoch: 2, Loss: 2.870499610900879


Processing epoch 01:   1%|▏         | 63/4850 [00:17<22:23,  3.56it/s]

Epoch: 2, Loss: 2.877092123031616


Processing epoch 01:   1%|▏         | 64/4850 [00:18<22:27,  3.55it/s]

Epoch: 2, Loss: 2.3735299110412598


Processing epoch 01:   1%|▏         | 65/4850 [00:18<22:22,  3.56it/s]

Epoch: 2, Loss: 2.4213216304779053


Processing epoch 01:   1%|▏         | 66/4850 [00:18<22:44,  3.51it/s]

Epoch: 2, Loss: 2.922305107116699


Processing epoch 01:   1%|▏         | 67/4850 [00:19<22:56,  3.47it/s]

Epoch: 2, Loss: 2.2056050300598145


Processing epoch 01:   1%|▏         | 68/4850 [00:19<23:07,  3.45it/s]

Epoch: 2, Loss: 2.538804531097412


Processing epoch 01:   1%|▏         | 69/4850 [00:19<23:07,  3.45it/s]

Epoch: 2, Loss: 2.8257181644439697


Processing epoch 01:   1%|▏         | 70/4850 [00:19<23:10,  3.44it/s]

Epoch: 2, Loss: 2.706540822982788


Processing epoch 01:   1%|▏         | 71/4850 [00:20<23:10,  3.44it/s]

Epoch: 2, Loss: 2.779531955718994


Processing epoch 01:   1%|▏         | 72/4850 [00:20<23:49,  3.34it/s]

Epoch: 2, Loss: 2.990307092666626


Processing epoch 01:   2%|▏         | 73/4850 [00:20<23:54,  3.33it/s]

Epoch: 2, Loss: 3.2606239318847656


Processing epoch 01:   2%|▏         | 74/4850 [00:21<23:55,  3.33it/s]

Epoch: 2, Loss: 4.060481071472168


Processing epoch 01:   2%|▏         | 75/4850 [00:21<23:52,  3.33it/s]

Epoch: 2, Loss: 2.6774773597717285


Processing epoch 01:   2%|▏         | 76/4850 [00:21<23:41,  3.36it/s]

Epoch: 2, Loss: 2.7557199001312256


Processing epoch 01:   2%|▏         | 77/4850 [00:22<24:10,  3.29it/s]

Epoch: 2, Loss: 2.70414662361145


Processing epoch 01:   2%|▏         | 78/4850 [00:22<23:36,  3.37it/s]

Epoch: 2, Loss: 2.959188938140869


Processing epoch 01:   2%|▏         | 79/4850 [00:22<23:11,  3.43it/s]

Epoch: 2, Loss: 2.4873297214508057


Processing epoch 01:   2%|▏         | 80/4850 [00:22<22:55,  3.47it/s]

Epoch: 2, Loss: 2.7294833660125732


Processing epoch 01:   2%|▏         | 81/4850 [00:23<22:55,  3.47it/s]

Epoch: 2, Loss: 3.22786808013916


Processing epoch 01:   2%|▏         | 82/4850 [00:23<22:49,  3.48it/s]

Epoch: 2, Loss: 3.4877572059631348


Processing epoch 01:   2%|▏         | 83/4850 [00:23<22:39,  3.51it/s]

Epoch: 2, Loss: 2.602125883102417


Processing epoch 01:   2%|▏         | 84/4850 [00:24<22:33,  3.52it/s]

Epoch: 2, Loss: 2.7873728275299072


Processing epoch 01:   2%|▏         | 85/4850 [00:24<22:27,  3.54it/s]

Epoch: 2, Loss: 2.9160776138305664


Processing epoch 01:   2%|▏         | 86/4850 [00:24<22:27,  3.54it/s]

Epoch: 2, Loss: 2.2804670333862305


Processing epoch 01:   2%|▏         | 87/4850 [00:24<22:39,  3.50it/s]

Epoch: 2, Loss: 2.5810306072235107


Processing epoch 01:   2%|▏         | 88/4850 [00:25<22:08,  3.58it/s]

Epoch: 2, Loss: 2.9040517807006836


Processing epoch 01:   2%|▏         | 89/4850 [00:25<22:06,  3.59it/s]

Epoch: 2, Loss: 3.86234712600708


Processing epoch 01:   2%|▏         | 90/4850 [00:25<22:16,  3.56it/s]

Epoch: 2, Loss: 3.204692840576172


Processing epoch 01:   2%|▏         | 91/4850 [00:26<22:25,  3.54it/s]

Epoch: 2, Loss: 2.1363162994384766


Processing epoch 01:   2%|▏         | 92/4850 [00:26<22:25,  3.54it/s]

Epoch: 2, Loss: 2.303896903991699


Processing epoch 01:   2%|▏         | 93/4850 [00:26<22:21,  3.55it/s]

Epoch: 2, Loss: 2.6034536361694336


Processing epoch 01:   2%|▏         | 94/4850 [00:26<22:22,  3.54it/s]

Epoch: 2, Loss: 2.846310615539551


Processing epoch 01:   2%|▏         | 95/4850 [00:27<22:13,  3.57it/s]

Epoch: 2, Loss: 3.8136067390441895


Processing epoch 01:   2%|▏         | 96/4850 [00:27<22:17,  3.55it/s]

Epoch: 2, Loss: 2.7318196296691895


Processing epoch 01:   2%|▏         | 97/4850 [00:27<22:22,  3.54it/s]

Epoch: 2, Loss: 3.2057418823242188


Processing epoch 01:   2%|▏         | 98/4850 [00:27<22:38,  3.50it/s]

Epoch: 2, Loss: 2.815793514251709


Processing epoch 01:   2%|▏         | 99/4850 [00:28<22:36,  3.50it/s]

Epoch: 2, Loss: 2.8522119522094727


Processing epoch 01:   2%|▏         | 100/4850 [00:28<22:34,  3.51it/s]

Epoch: 2, Loss: 2.6915903091430664


Processing epoch 01:   2%|▏         | 101/4850 [00:28<22:35,  3.50it/s]

Epoch: 2, Loss: 2.671311378479004


Processing epoch 01:   2%|▏         | 102/4850 [00:29<22:31,  3.51it/s]

Epoch: 2, Loss: 2.883057117462158


Processing epoch 01:   2%|▏         | 103/4850 [00:29<22:28,  3.52it/s]

Epoch: 2, Loss: 2.2271065711975098


Processing epoch 01:   2%|▏         | 104/4850 [00:29<22:35,  3.50it/s]

Epoch: 2, Loss: 2.603111743927002


Processing epoch 01:   2%|▏         | 105/4850 [00:29<22:28,  3.52it/s]

Epoch: 2, Loss: 2.972332000732422


Processing epoch 01:   2%|▏         | 106/4850 [00:30<22:26,  3.52it/s]

Epoch: 2, Loss: 2.5273494720458984


Processing epoch 01:   2%|▏         | 107/4850 [00:30<22:43,  3.48it/s]

Epoch: 2, Loss: 2.371346950531006


Processing epoch 01:   2%|▏         | 108/4850 [00:30<22:43,  3.48it/s]

Epoch: 2, Loss: 2.9284045696258545


Processing epoch 01:   2%|▏         | 109/4850 [00:31<22:47,  3.47it/s]

Epoch: 2, Loss: 2.6147754192352295


Processing epoch 01:   2%|▏         | 110/4850 [00:31<22:34,  3.50it/s]

Epoch: 2, Loss: 2.7939319610595703


Processing epoch 01:   2%|▏         | 111/4850 [00:31<22:32,  3.50it/s]

Epoch: 2, Loss: 3.1238698959350586


Processing epoch 01:   2%|▏         | 112/4850 [00:31<22:28,  3.51it/s]

Epoch: 2, Loss: 2.793463706970215


Processing epoch 01:   2%|▏         | 113/4850 [00:32<22:22,  3.53it/s]

Epoch: 2, Loss: 2.66969633102417


Processing epoch 01:   2%|▏         | 114/4850 [00:32<22:27,  3.51it/s]

Epoch: 2, Loss: 2.1985831260681152


Processing epoch 01:   2%|▏         | 115/4850 [00:32<22:45,  3.47it/s]

Epoch: 2, Loss: 2.9586825370788574


Processing epoch 01:   2%|▏         | 116/4850 [00:33<22:40,  3.48it/s]

Epoch: 2, Loss: 2.5014898777008057


Processing epoch 01:   2%|▏         | 117/4850 [00:33<22:51,  3.45it/s]

Epoch: 2, Loss: 2.8199081420898438


Processing epoch 01:   2%|▏         | 118/4850 [00:33<23:02,  3.42it/s]

Epoch: 2, Loss: 2.7047502994537354


Processing epoch 01:   2%|▏         | 119/4850 [00:34<23:15,  3.39it/s]

Epoch: 2, Loss: 2.3420445919036865


Processing epoch 01:   2%|▏         | 120/4850 [00:34<23:17,  3.38it/s]

Epoch: 2, Loss: 3.0984036922454834


Processing epoch 01:   2%|▏         | 121/4850 [00:34<23:22,  3.37it/s]

Epoch: 2, Loss: 2.783459424972534


Processing epoch 01:   3%|▎         | 122/4850 [00:34<23:14,  3.39it/s]

Epoch: 2, Loss: 2.6406161785125732


Processing epoch 01:   3%|▎         | 123/4850 [00:35<23:33,  3.34it/s]

Epoch: 2, Loss: 2.7144947052001953


Processing epoch 01:   3%|▎         | 124/4850 [00:35<23:23,  3.37it/s]

Epoch: 2, Loss: 2.4792675971984863


Processing epoch 01:   3%|▎         | 125/4850 [00:35<23:31,  3.35it/s]

Epoch: 2, Loss: 3.2524585723876953


Processing epoch 01:   3%|▎         | 126/4850 [00:36<22:59,  3.42it/s]

Epoch: 2, Loss: 2.978853702545166


Processing epoch 01:   3%|▎         | 127/4850 [00:36<22:45,  3.46it/s]

Epoch: 2, Loss: 2.17657732963562


Processing epoch 01:   3%|▎         | 128/4850 [00:36<22:30,  3.50it/s]

Epoch: 2, Loss: 3.4315438270568848


Processing epoch 01:   3%|▎         | 129/4850 [00:36<22:24,  3.51it/s]

Epoch: 2, Loss: 3.548112630844116


Processing epoch 01:   3%|▎         | 130/4850 [00:37<22:13,  3.54it/s]

Epoch: 2, Loss: 2.8355650901794434


Processing epoch 01:   3%|▎         | 131/4850 [00:37<22:06,  3.56it/s]

Epoch: 2, Loss: 3.5786941051483154


Processing epoch 01:   3%|▎         | 132/4850 [00:37<22:04,  3.56it/s]

Epoch: 2, Loss: 2.785452365875244


Processing epoch 01:   3%|▎         | 133/4850 [00:38<21:57,  3.58it/s]

Epoch: 2, Loss: 3.0627970695495605


Processing epoch 01:   3%|▎         | 134/4850 [00:38<21:56,  3.58it/s]

Epoch: 2, Loss: 2.8208565711975098


Processing epoch 01:   3%|▎         | 135/4850 [00:38<21:50,  3.60it/s]

Epoch: 2, Loss: 2.970101833343506


Processing epoch 01:   3%|▎         | 136/4850 [00:38<21:52,  3.59it/s]

Epoch: 2, Loss: 3.150998830795288


Processing epoch 01:   3%|▎         | 137/4850 [00:39<21:54,  3.58it/s]

Epoch: 2, Loss: 2.8758630752563477


Processing epoch 01:   3%|▎         | 138/4850 [00:39<21:58,  3.57it/s]

Epoch: 2, Loss: 2.7576565742492676


Processing epoch 01:   3%|▎         | 139/4850 [00:39<21:57,  3.58it/s]

Epoch: 2, Loss: 2.7856059074401855


Processing epoch 01:   3%|▎         | 140/4850 [00:40<21:54,  3.58it/s]

Epoch: 2, Loss: 3.9322509765625


Processing epoch 01:   3%|▎         | 141/4850 [00:40<21:55,  3.58it/s]

Epoch: 2, Loss: 3.0175623893737793


Processing epoch 01:   3%|▎         | 142/4850 [00:40<21:52,  3.59it/s]

Epoch: 2, Loss: 2.63464617729187


Processing epoch 01:   3%|▎         | 143/4850 [00:40<21:56,  3.58it/s]

Epoch: 2, Loss: 2.623349189758301


Processing epoch 01:   3%|▎         | 144/4850 [00:41<22:03,  3.55it/s]

Epoch: 2, Loss: 3.1822686195373535


Processing epoch 01:   3%|▎         | 145/4850 [00:41<22:05,  3.55it/s]

Epoch: 2, Loss: 2.586838483810425


Processing epoch 01:   3%|▎         | 146/4850 [00:41<22:01,  3.56it/s]

Epoch: 2, Loss: 2.9685211181640625


Processing epoch 01:   3%|▎         | 147/4850 [00:41<21:53,  3.58it/s]

Epoch: 2, Loss: 3.285797119140625


Processing epoch 01:   3%|▎         | 148/4850 [00:42<21:52,  3.58it/s]

Epoch: 2, Loss: 2.324185848236084


Processing epoch 01:   3%|▎         | 149/4850 [00:42<21:52,  3.58it/s]

Epoch: 2, Loss: 2.953904628753662


Processing epoch 01:   3%|▎         | 150/4850 [00:42<21:55,  3.57it/s]

Epoch: 2, Loss: 2.970484495162964


Processing epoch 01:   3%|▎         | 151/4850 [00:43<21:53,  3.58it/s]

Epoch: 2, Loss: 3.673774242401123


Processing epoch 01:   3%|▎         | 152/4850 [00:43<21:53,  3.58it/s]

Epoch: 2, Loss: 2.9886221885681152


Processing epoch 01:   3%|▎         | 153/4850 [00:43<21:49,  3.59it/s]

Epoch: 2, Loss: 3.261537790298462


Processing epoch 01:   3%|▎         | 154/4850 [00:43<21:50,  3.58it/s]

Epoch: 2, Loss: 2.2700467109680176


Processing epoch 01:   3%|▎         | 155/4850 [00:44<21:50,  3.58it/s]

Epoch: 2, Loss: 2.899190902709961


Processing epoch 01:   3%|▎         | 156/4850 [00:44<21:51,  3.58it/s]

Epoch: 2, Loss: 2.268326759338379


Processing epoch 01:   3%|▎         | 157/4850 [00:44<21:47,  3.59it/s]

Epoch: 2, Loss: 2.667154312133789


Processing epoch 01:   3%|▎         | 158/4850 [00:45<21:53,  3.57it/s]

Epoch: 2, Loss: 3.1038918495178223


Processing epoch 01:   3%|▎         | 159/4850 [00:45<21:49,  3.58it/s]

Epoch: 2, Loss: 2.4220499992370605


Processing epoch 01:   3%|▎         | 160/4850 [00:45<21:52,  3.57it/s]

Epoch: 2, Loss: 2.674771308898926


Processing epoch 01:   3%|▎         | 161/4850 [00:45<21:53,  3.57it/s]

Epoch: 2, Loss: 3.215449810028076


Processing epoch 01:   3%|▎         | 162/4850 [00:46<22:33,  3.46it/s]

Epoch: 2, Loss: 2.733218193054199


Processing epoch 01:   3%|▎         | 163/4850 [00:46<22:24,  3.49it/s]

Epoch: 2, Loss: 2.567823886871338


Processing epoch 01:   3%|▎         | 164/4850 [00:46<22:17,  3.50it/s]

Epoch: 2, Loss: 2.76599383354187


Processing epoch 01:   3%|▎         | 165/4850 [00:47<22:09,  3.52it/s]

Epoch: 2, Loss: 2.8830182552337646


Processing epoch 01:   3%|▎         | 166/4850 [00:47<22:40,  3.44it/s]

Epoch: 2, Loss: 2.5302772521972656


Processing epoch 01:   3%|▎         | 167/4850 [00:47<22:46,  3.43it/s]

Epoch: 2, Loss: 3.4007821083068848


Processing epoch 01:   3%|▎         | 168/4850 [00:47<22:59,  3.39it/s]

Epoch: 2, Loss: 3.5963666439056396


Processing epoch 01:   3%|▎         | 169/4850 [00:48<23:12,  3.36it/s]

Epoch: 2, Loss: 2.439225673675537


Processing epoch 01:   4%|▎         | 170/4850 [00:48<23:11,  3.36it/s]

Epoch: 2, Loss: 2.473787784576416


Processing epoch 01:   4%|▎         | 171/4850 [00:48<23:02,  3.38it/s]

Epoch: 2, Loss: 3.066688060760498


Processing epoch 01:   4%|▎         | 172/4850 [00:49<22:48,  3.42it/s]

Epoch: 2, Loss: 2.7928361892700195


Processing epoch 01:   4%|▎         | 173/4850 [00:49<23:04,  3.38it/s]

Epoch: 2, Loss: 2.696092128753662


Processing epoch 01:   4%|▎         | 174/4850 [00:49<23:21,  3.34it/s]

Epoch: 2, Loss: 2.9620494842529297


Processing epoch 01:   4%|▎         | 175/4850 [00:50<22:59,  3.39it/s]

Epoch: 2, Loss: 2.6513259410858154


Processing epoch 01:   4%|▎         | 176/4850 [00:50<22:34,  3.45it/s]

Epoch: 2, Loss: 2.7915968894958496


Processing epoch 01:   4%|▎         | 177/4850 [00:50<22:14,  3.50it/s]

Epoch: 2, Loss: 3.38023042678833


Processing epoch 01:   4%|▎         | 178/4850 [00:50<21:59,  3.54it/s]

Epoch: 2, Loss: 2.782468318939209


Processing epoch 01:   4%|▎         | 179/4850 [00:51<21:47,  3.57it/s]

Epoch: 2, Loss: 2.525371551513672


Processing epoch 01:   4%|▎         | 180/4850 [00:51<21:49,  3.57it/s]

Epoch: 2, Loss: 2.5964066982269287


Processing epoch 01:   4%|▎         | 181/4850 [00:51<21:48,  3.57it/s]

Epoch: 2, Loss: 2.9521021842956543


Processing epoch 01:   4%|▍         | 182/4850 [00:51<21:54,  3.55it/s]

Epoch: 2, Loss: 2.769069194793701


Processing epoch 01:   4%|▍         | 183/4850 [00:52<21:58,  3.54it/s]

Epoch: 2, Loss: 2.99127197265625


Processing epoch 01:   4%|▍         | 184/4850 [00:52<22:32,  3.45it/s]

Epoch: 2, Loss: 2.773406505584717


Processing epoch 01:   4%|▍         | 185/4850 [00:52<22:44,  3.42it/s]

Epoch: 2, Loss: 2.557138442993164


Processing epoch 01:   4%|▍         | 186/4850 [00:53<22:26,  3.46it/s]

Epoch: 2, Loss: 2.9350991249084473


Processing epoch 01:   4%|▍         | 187/4850 [00:53<22:47,  3.41it/s]

Epoch: 2, Loss: 2.733410120010376


Processing epoch 01:   4%|▍         | 188/4850 [00:53<22:47,  3.41it/s]

Epoch: 2, Loss: 2.968107223510742


Processing epoch 01:   4%|▍         | 189/4850 [00:54<22:43,  3.42it/s]

Epoch: 2, Loss: 3.3886494636535645


Processing epoch 01:   4%|▍         | 190/4850 [00:54<23:07,  3.36it/s]

Epoch: 2, Loss: 2.4711599349975586


Processing epoch 01:   4%|▍         | 191/4850 [00:54<22:52,  3.39it/s]

Epoch: 2, Loss: 3.601944923400879


Processing epoch 01:   4%|▍         | 192/4850 [00:54<22:55,  3.39it/s]

Epoch: 2, Loss: 2.914754629135132


Processing epoch 01:   4%|▍         | 193/4850 [00:55<23:11,  3.35it/s]

Epoch: 2, Loss: 2.764608860015869


Processing epoch 01:   4%|▍         | 194/4850 [00:55<23:24,  3.31it/s]

Epoch: 2, Loss: 2.8936901092529297


Processing epoch 01:   4%|▍         | 195/4850 [00:55<23:24,  3.31it/s]

Epoch: 2, Loss: 2.4858479499816895


Processing epoch 01:   4%|▍         | 196/4850 [00:56<22:52,  3.39it/s]

Epoch: 2, Loss: 2.4756295680999756


Processing epoch 01:   4%|▍         | 197/4850 [00:56<22:31,  3.44it/s]

Epoch: 2, Loss: 2.891880512237549


Processing epoch 01:   4%|▍         | 198/4850 [00:56<22:06,  3.51it/s]

Epoch: 2, Loss: 3.5457091331481934


Processing epoch 01:   4%|▍         | 199/4850 [00:56<22:01,  3.52it/s]

Epoch: 2, Loss: 2.868986129760742


Processing epoch 01:   4%|▍         | 200/4850 [00:57<21:54,  3.54it/s]

Epoch: 2, Loss: 2.660078525543213


Processing epoch 01:   4%|▍         | 201/4850 [00:57<21:49,  3.55it/s]

Epoch: 2, Loss: 3.2095179557800293


Processing epoch 01:   4%|▍         | 202/4850 [00:57<21:41,  3.57it/s]

Epoch: 2, Loss: 2.749741554260254


Processing epoch 01:   4%|▍         | 203/4850 [00:58<21:56,  3.53it/s]

Epoch: 2, Loss: 2.3316526412963867


Processing epoch 01:   4%|▍         | 204/4850 [00:58<21:48,  3.55it/s]

Epoch: 2, Loss: 2.713352680206299


Processing epoch 01:   4%|▍         | 205/4850 [00:58<21:36,  3.58it/s]

Epoch: 2, Loss: 3.3659815788269043


Processing epoch 01:   4%|▍         | 206/4850 [00:58<21:35,  3.59it/s]

Epoch: 2, Loss: 2.8828539848327637


Processing epoch 01:   4%|▍         | 207/4850 [00:59<21:38,  3.58it/s]

Epoch: 2, Loss: 2.90948486328125


Processing epoch 01:   4%|▍         | 208/4850 [00:59<21:33,  3.59it/s]

Epoch: 2, Loss: 2.779709815979004


Processing epoch 01:   4%|▍         | 209/4850 [00:59<21:31,  3.59it/s]

Epoch: 2, Loss: 3.638826370239258


Processing epoch 01:   4%|▍         | 210/4850 [01:00<21:51,  3.54it/s]

Epoch: 2, Loss: 2.793203830718994


Processing epoch 01:   4%|▍         | 211/4850 [01:00<21:59,  3.52it/s]

Epoch: 2, Loss: 3.1989450454711914


Processing epoch 01:   4%|▍         | 212/4850 [01:00<22:28,  3.44it/s]

Epoch: 2, Loss: 3.52142333984375


Processing epoch 01:   4%|▍         | 213/4850 [01:00<22:16,  3.47it/s]

Epoch: 2, Loss: 2.438467025756836


Processing epoch 01:   4%|▍         | 214/4850 [01:01<22:18,  3.46it/s]

Epoch: 2, Loss: 2.5652031898498535


Processing epoch 01:   4%|▍         | 215/4850 [01:01<22:40,  3.41it/s]

Epoch: 2, Loss: 2.993701934814453


Processing epoch 01:   4%|▍         | 216/4850 [01:01<22:47,  3.39it/s]

Epoch: 2, Loss: 2.506927251815796


Processing epoch 01:   4%|▍         | 217/4850 [01:02<22:33,  3.42it/s]

Epoch: 2, Loss: 2.926236629486084


Processing epoch 01:   4%|▍         | 218/4850 [01:02<22:38,  3.41it/s]

Epoch: 2, Loss: 3.337268352508545


Processing epoch 01:   5%|▍         | 219/4850 [01:02<22:49,  3.38it/s]

Epoch: 2, Loss: 3.437312602996826


Processing epoch 01:   5%|▍         | 220/4850 [01:02<22:32,  3.42it/s]

Epoch: 2, Loss: 2.579463481903076


Processing epoch 01:   5%|▍         | 221/4850 [01:03<23:03,  3.35it/s]

Epoch: 2, Loss: 2.6189627647399902


Processing epoch 01:   5%|▍         | 222/4850 [01:03<22:33,  3.42it/s]

Epoch: 2, Loss: 3.5065338611602783


Processing epoch 01:   5%|▍         | 223/4850 [01:03<22:08,  3.48it/s]

Epoch: 2, Loss: 2.889268398284912


Processing epoch 01:   5%|▍         | 224/4850 [01:04<21:50,  3.53it/s]

Epoch: 2, Loss: 4.481980323791504


Processing epoch 01:   5%|▍         | 225/4850 [01:04<21:53,  3.52it/s]

Epoch: 2, Loss: 3.439359188079834


Processing epoch 01:   5%|▍         | 226/4850 [01:04<21:45,  3.54it/s]

Epoch: 2, Loss: 2.6209716796875


Processing epoch 01:   5%|▍         | 227/4850 [01:04<21:43,  3.55it/s]

Epoch: 2, Loss: 2.553893804550171


Processing epoch 01:   5%|▍         | 228/4850 [01:05<21:44,  3.54it/s]

Epoch: 2, Loss: 3.1161417961120605


Processing epoch 01:   5%|▍         | 229/4850 [01:05<21:41,  3.55it/s]

Epoch: 2, Loss: 2.4717154502868652


Processing epoch 01:   5%|▍         | 230/4850 [01:05<21:27,  3.59it/s]

Epoch: 2, Loss: 3.8659253120422363


Processing epoch 01:   5%|▍         | 231/4850 [01:06<21:23,  3.60it/s]

Epoch: 2, Loss: 3.282393217086792


Processing epoch 01:   5%|▍         | 232/4850 [01:06<21:17,  3.62it/s]

Epoch: 2, Loss: 3.873382091522217


Processing epoch 01:   5%|▍         | 233/4850 [01:06<21:15,  3.62it/s]

Epoch: 2, Loss: 3.1177587509155273


Processing epoch 01:   5%|▍         | 234/4850 [01:06<21:16,  3.62it/s]

Epoch: 2, Loss: 3.1479897499084473


Processing epoch 01:   5%|▍         | 235/4850 [01:07<21:17,  3.61it/s]

Epoch: 2, Loss: 2.652648687362671


Processing epoch 01:   5%|▍         | 236/4850 [01:07<21:20,  3.60it/s]

Epoch: 2, Loss: 2.614421844482422


Processing epoch 01:   5%|▍         | 237/4850 [01:07<21:23,  3.59it/s]

Epoch: 2, Loss: 3.1764206886291504


Processing epoch 01:   5%|▍         | 238/4850 [01:08<21:22,  3.60it/s]

Epoch: 2, Loss: 2.082350492477417


Processing epoch 01:   5%|▍         | 239/4850 [01:08<21:26,  3.58it/s]

Epoch: 2, Loss: 2.8309121131896973


Processing epoch 01:   5%|▍         | 240/4850 [01:08<21:24,  3.59it/s]

Epoch: 2, Loss: 2.391869068145752


Processing epoch 01:   5%|▍         | 241/4850 [01:08<21:25,  3.58it/s]

Epoch: 2, Loss: 2.543666362762451


Processing epoch 01:   5%|▍         | 242/4850 [01:09<21:24,  3.59it/s]

Epoch: 2, Loss: 2.998333692550659


Processing epoch 01:   5%|▌         | 243/4850 [01:09<21:31,  3.57it/s]

Epoch: 2, Loss: 2.703687906265259


Processing epoch 01:   5%|▌         | 244/4850 [01:09<21:36,  3.55it/s]

Epoch: 2, Loss: 3.381131887435913


Processing epoch 01:   5%|▌         | 245/4850 [01:09<21:41,  3.54it/s]

Epoch: 2, Loss: 2.284104824066162


Processing epoch 01:   5%|▌         | 246/4850 [01:10<21:24,  3.59it/s]

Epoch: 2, Loss: 3.9787356853485107


Processing epoch 01:   5%|▌         | 247/4850 [01:10<21:25,  3.58it/s]

Epoch: 2, Loss: 2.5211374759674072


Processing epoch 01:   5%|▌         | 248/4850 [01:10<21:26,  3.58it/s]

Epoch: 2, Loss: 3.139301300048828


Processing epoch 01:   5%|▌         | 249/4850 [01:11<21:19,  3.60it/s]

Epoch: 2, Loss: 3.0967278480529785


Processing epoch 01:   5%|▌         | 250/4850 [01:11<21:22,  3.59it/s]

Epoch: 2, Loss: 2.746068000793457


Processing epoch 01:   5%|▌         | 251/4850 [01:11<21:18,  3.60it/s]

Epoch: 2, Loss: 2.499610424041748


Processing epoch 01:   5%|▌         | 252/4850 [01:11<21:20,  3.59it/s]

Epoch: 2, Loss: 2.5924949645996094


Processing epoch 01:   5%|▌         | 253/4850 [01:12<21:22,  3.58it/s]

Epoch: 2, Loss: 2.586707353591919


Processing epoch 01:   5%|▌         | 254/4850 [01:12<21:23,  3.58it/s]

Epoch: 2, Loss: 3.2775020599365234


Processing epoch 01:   5%|▌         | 255/4850 [01:12<21:21,  3.59it/s]

Epoch: 2, Loss: 3.119649887084961


Processing epoch 01:   5%|▌         | 256/4850 [01:13<21:20,  3.59it/s]

Epoch: 2, Loss: 2.455923557281494


Processing epoch 01:   5%|▌         | 257/4850 [01:13<21:26,  3.57it/s]

Epoch: 2, Loss: 2.8870017528533936


Processing epoch 01:   5%|▌         | 258/4850 [01:13<21:43,  3.52it/s]

Epoch: 2, Loss: 3.396557569503784


Processing epoch 01:   5%|▌         | 259/4850 [01:13<22:10,  3.45it/s]

Epoch: 2, Loss: 2.9835920333862305


Processing epoch 01:   5%|▌         | 260/4850 [01:14<22:10,  3.45it/s]

Epoch: 2, Loss: 3.033649444580078


Processing epoch 01:   5%|▌         | 261/4850 [01:14<21:57,  3.48it/s]

Epoch: 2, Loss: 3.251924514770508


Processing epoch 01:   5%|▌         | 262/4850 [01:14<22:33,  3.39it/s]

Epoch: 2, Loss: 2.6301302909851074


Processing epoch 01:   5%|▌         | 263/4850 [01:15<22:17,  3.43it/s]

Epoch: 2, Loss: 2.1462132930755615


Processing epoch 01:   5%|▌         | 264/4850 [01:15<22:31,  3.39it/s]

Epoch: 2, Loss: 2.739096164703369


Processing epoch 01:   5%|▌         | 265/4850 [01:15<22:51,  3.34it/s]

Epoch: 2, Loss: 2.5260446071624756


Processing epoch 01:   5%|▌         | 266/4850 [01:15<22:41,  3.37it/s]

Epoch: 2, Loss: 3.119530200958252


Processing epoch 01:   6%|▌         | 267/4850 [01:16<22:50,  3.34it/s]

Epoch: 2, Loss: 2.989959239959717


Processing epoch 01:   6%|▌         | 268/4850 [01:16<22:36,  3.38it/s]

Epoch: 2, Loss: 2.866123676300049


Processing epoch 01:   6%|▌         | 269/4850 [01:16<22:26,  3.40it/s]

Epoch: 2, Loss: 3.558683156967163


Processing epoch 01:   6%|▌         | 270/4850 [01:17<22:22,  3.41it/s]

Epoch: 2, Loss: 2.745457172393799


Processing epoch 01:   6%|▌         | 271/4850 [01:17<22:14,  3.43it/s]

Epoch: 2, Loss: 2.9240622520446777


Processing epoch 01:   6%|▌         | 272/4850 [01:17<22:03,  3.46it/s]

Epoch: 2, Loss: 2.5405142307281494


Processing epoch 01:   6%|▌         | 273/4850 [01:18<21:55,  3.48it/s]

Epoch: 2, Loss: 2.98551344871521


Processing epoch 01:   6%|▌         | 274/4850 [01:18<21:48,  3.50it/s]

Epoch: 2, Loss: 2.9955894947052


Processing epoch 01:   6%|▌         | 275/4850 [01:18<21:37,  3.53it/s]

Epoch: 2, Loss: 3.2265939712524414


Processing epoch 01:   6%|▌         | 276/4850 [01:18<21:32,  3.54it/s]

Epoch: 2, Loss: 2.045228958129883


Processing epoch 01:   6%|▌         | 277/4850 [01:19<21:29,  3.55it/s]

Epoch: 2, Loss: 3.2694201469421387


Processing epoch 01:   6%|▌         | 278/4850 [01:19<21:29,  3.55it/s]

Epoch: 2, Loss: 2.851624011993408


Processing epoch 01:   6%|▌         | 279/4850 [01:19<21:26,  3.55it/s]

Epoch: 2, Loss: 2.7296645641326904


Processing epoch 01:   6%|▌         | 280/4850 [01:19<21:24,  3.56it/s]

Epoch: 2, Loss: 2.371591567993164


Processing epoch 01:   6%|▌         | 281/4850 [01:20<21:22,  3.56it/s]

Epoch: 2, Loss: 2.4453492164611816


Processing epoch 01:   6%|▌         | 282/4850 [01:20<21:15,  3.58it/s]

Epoch: 2, Loss: 2.7732977867126465


Processing epoch 01:   6%|▌         | 283/4850 [01:20<21:12,  3.59it/s]

Epoch: 2, Loss: 3.2293992042541504


Processing epoch 01:   6%|▌         | 284/4850 [01:21<21:10,  3.59it/s]

Epoch: 2, Loss: 3.704012155532837


Processing epoch 01:   6%|▌         | 285/4850 [01:21<21:11,  3.59it/s]

Epoch: 2, Loss: 2.59515643119812


Processing epoch 01:   6%|▌         | 286/4850 [01:21<21:09,  3.60it/s]

Epoch: 2, Loss: 3.068553924560547


Processing epoch 01:   6%|▌         | 287/4850 [01:21<21:03,  3.61it/s]

Epoch: 2, Loss: 3.1213061809539795


Processing epoch 01:   6%|▌         | 288/4850 [01:22<21:00,  3.62it/s]

Epoch: 2, Loss: 2.6877846717834473


Processing epoch 01:   6%|▌         | 289/4850 [01:22<21:06,  3.60it/s]

Epoch: 2, Loss: 2.9437875747680664


Processing epoch 01:   6%|▌         | 290/4850 [01:22<21:07,  3.60it/s]

Epoch: 2, Loss: 2.7973155975341797


Processing epoch 01:   6%|▌         | 291/4850 [01:23<21:20,  3.56it/s]

Epoch: 2, Loss: 2.690640449523926


Processing epoch 01:   6%|▌         | 292/4850 [01:23<21:34,  3.52it/s]

Epoch: 2, Loss: 3.1638548374176025


Processing epoch 01:   6%|▌         | 293/4850 [01:23<21:24,  3.55it/s]

Epoch: 2, Loss: 2.878831386566162


Processing epoch 01:   6%|▌         | 294/4850 [01:23<21:20,  3.56it/s]

Epoch: 2, Loss: 2.6762566566467285


Processing epoch 01:   6%|▌         | 295/4850 [01:24<21:14,  3.57it/s]

Epoch: 2, Loss: 2.4825899600982666


Processing epoch 01:   6%|▌         | 296/4850 [01:24<21:10,  3.58it/s]

Epoch: 2, Loss: 2.637263774871826


Processing epoch 01:   6%|▌         | 297/4850 [01:24<21:05,  3.60it/s]

Epoch: 2, Loss: 3.4869067668914795


Processing epoch 01:   6%|▌         | 298/4850 [01:24<21:02,  3.60it/s]

Epoch: 2, Loss: 2.5541582107543945


Processing epoch 01:   6%|▌         | 299/4850 [01:25<21:02,  3.61it/s]

Epoch: 2, Loss: 2.55186128616333


Processing epoch 01:   6%|▌         | 300/4850 [01:25<21:07,  3.59it/s]

Epoch: 2, Loss: 2.6854047775268555


Processing epoch 01:   6%|▌         | 301/4850 [01:25<21:00,  3.61it/s]

Epoch: 2, Loss: 2.589996099472046


Processing epoch 01:   6%|▌         | 302/4850 [01:26<21:10,  3.58it/s]

Epoch: 2, Loss: 3.680553436279297


Processing epoch 01:   6%|▌         | 303/4850 [01:26<21:09,  3.58it/s]

Epoch: 2, Loss: 3.1468875408172607


Processing epoch 01:   6%|▋         | 304/4850 [01:26<21:09,  3.58it/s]

Epoch: 2, Loss: 2.9445009231567383


Processing epoch 01:   6%|▋         | 305/4850 [01:26<21:13,  3.57it/s]

Epoch: 2, Loss: 2.7397279739379883


Processing epoch 01:   6%|▋         | 306/4850 [01:27<21:17,  3.56it/s]

Epoch: 2, Loss: 2.699619770050049


Processing epoch 01:   6%|▋         | 307/4850 [01:27<21:48,  3.47it/s]

Epoch: 2, Loss: 2.4411191940307617


Processing epoch 01:   6%|▋         | 308/4850 [01:27<22:05,  3.43it/s]

Epoch: 2, Loss: 2.7552781105041504


Processing epoch 01:   6%|▋         | 309/4850 [01:28<22:22,  3.38it/s]

Epoch: 2, Loss: 2.89041805267334


Processing epoch 01:   6%|▋         | 310/4850 [01:28<22:34,  3.35it/s]

Epoch: 2, Loss: 3.228334903717041


Processing epoch 01:   6%|▋         | 311/4850 [01:28<22:21,  3.38it/s]

Epoch: 2, Loss: 2.4920759201049805


Processing epoch 01:   6%|▋         | 312/4850 [01:29<22:36,  3.35it/s]

Epoch: 2, Loss: 3.063533306121826


Processing epoch 01:   6%|▋         | 313/4850 [01:29<22:40,  3.34it/s]

Epoch: 2, Loss: 2.6107399463653564


Processing epoch 01:   6%|▋         | 314/4850 [01:29<22:34,  3.35it/s]

Epoch: 2, Loss: 2.9426145553588867


Processing epoch 01:   6%|▋         | 315/4850 [01:29<22:36,  3.34it/s]

Epoch: 2, Loss: 2.557624578475952


Processing epoch 01:   7%|▋         | 316/4850 [01:30<22:55,  3.30it/s]

Epoch: 2, Loss: 3.1621246337890625


Processing epoch 01:   7%|▋         | 317/4850 [01:30<22:51,  3.31it/s]

Epoch: 2, Loss: 2.5629630088806152


Processing epoch 01:   7%|▋         | 318/4850 [01:30<22:57,  3.29it/s]

Epoch: 2, Loss: 2.3864498138427734


Processing epoch 01:   7%|▋         | 319/4850 [01:31<22:57,  3.29it/s]

Epoch: 2, Loss: 2.5438389778137207


Processing epoch 01:   7%|▋         | 320/4850 [01:31<22:29,  3.36it/s]

Epoch: 2, Loss: 3.1530215740203857


Processing epoch 01:   7%|▋         | 321/4850 [01:31<22:11,  3.40it/s]

Epoch: 2, Loss: 2.739422559738159


Processing epoch 01:   7%|▋         | 322/4850 [01:32<21:49,  3.46it/s]

Epoch: 2, Loss: 2.795701503753662


Processing epoch 01:   7%|▋         | 323/4850 [01:32<21:41,  3.48it/s]

Epoch: 2, Loss: 2.9439563751220703


Processing epoch 01:   7%|▋         | 324/4850 [01:32<21:35,  3.49it/s]

Epoch: 2, Loss: 3.1931939125061035


Processing epoch 01:   7%|▋         | 325/4850 [01:32<21:23,  3.52it/s]

Epoch: 2, Loss: 2.697629690170288


Processing epoch 01:   7%|▋         | 326/4850 [01:33<21:22,  3.53it/s]

Epoch: 2, Loss: 2.949524402618408


Processing epoch 01:   7%|▋         | 327/4850 [01:33<21:19,  3.54it/s]

Epoch: 2, Loss: 3.152669668197632


Processing epoch 01:   7%|▋         | 328/4850 [01:33<21:16,  3.54it/s]

Epoch: 2, Loss: 2.211357593536377


Processing epoch 01:   7%|▋         | 329/4850 [01:33<21:27,  3.51it/s]

Epoch: 2, Loss: 2.9658865928649902


Processing epoch 01:   7%|▋         | 330/4850 [01:34<21:42,  3.47it/s]

Epoch: 2, Loss: 3.4159657955169678


Processing epoch 01:   7%|▋         | 331/4850 [01:34<21:30,  3.50it/s]

Epoch: 2, Loss: 2.2459683418273926


Processing epoch 01:   7%|▋         | 332/4850 [01:34<21:21,  3.53it/s]

Epoch: 2, Loss: 2.515397548675537


Processing epoch 01:   7%|▋         | 333/4850 [01:35<21:12,  3.55it/s]

Epoch: 2, Loss: 3.3475699424743652


Processing epoch 01:   7%|▋         | 334/4850 [01:35<21:18,  3.53it/s]

Epoch: 2, Loss: 2.718670606613159


Processing epoch 01:   7%|▋         | 335/4850 [01:35<21:10,  3.55it/s]

Epoch: 2, Loss: 2.1453561782836914


Processing epoch 01:   7%|▋         | 336/4850 [01:35<21:09,  3.56it/s]

Epoch: 2, Loss: 2.40938663482666


Processing epoch 01:   7%|▋         | 337/4850 [01:36<21:07,  3.56it/s]

Epoch: 2, Loss: 2.7298805713653564


Processing epoch 01:   7%|▋         | 338/4850 [01:36<21:06,  3.56it/s]

Epoch: 2, Loss: 2.714280128479004


Processing epoch 01:   7%|▋         | 339/4850 [01:36<20:59,  3.58it/s]

Epoch: 2, Loss: 3.1313390731811523


Processing epoch 01:   7%|▋         | 340/4850 [01:37<20:56,  3.59it/s]

Epoch: 2, Loss: 3.062981605529785


Processing epoch 01:   7%|▋         | 341/4850 [01:37<20:49,  3.61it/s]

Epoch: 2, Loss: 2.99613618850708


Processing epoch 01:   7%|▋         | 342/4850 [01:37<20:51,  3.60it/s]

Epoch: 2, Loss: 3.396918296813965


Processing epoch 01:   7%|▋         | 343/4850 [01:37<20:52,  3.60it/s]

Epoch: 2, Loss: 2.6716699600219727


Processing epoch 01:   7%|▋         | 344/4850 [01:38<20:52,  3.60it/s]

Epoch: 2, Loss: 2.988943099975586


Processing epoch 01:   7%|▋         | 345/4850 [01:38<20:50,  3.60it/s]

Epoch: 2, Loss: 2.543867588043213


Processing epoch 01:   7%|▋         | 346/4850 [01:38<20:49,  3.61it/s]

Epoch: 2, Loss: 3.236696243286133


Processing epoch 01:   7%|▋         | 347/4850 [01:39<20:50,  3.60it/s]

Epoch: 2, Loss: 3.335362434387207


Processing epoch 01:   7%|▋         | 348/4850 [01:39<20:55,  3.59it/s]

Epoch: 2, Loss: 2.1586523056030273


Processing epoch 01:   7%|▋         | 349/4850 [01:39<21:04,  3.56it/s]

Epoch: 2, Loss: 2.895359516143799


Processing epoch 01:   7%|▋         | 350/4850 [01:39<20:59,  3.57it/s]

Epoch: 2, Loss: 2.542959213256836


Processing epoch 01:   7%|▋         | 351/4850 [01:40<20:56,  3.58it/s]

Epoch: 2, Loss: 2.871464729309082


Processing epoch 01:   7%|▋         | 352/4850 [01:40<20:55,  3.58it/s]

Epoch: 2, Loss: 3.450000286102295


Processing epoch 01:   7%|▋         | 353/4850 [01:40<21:01,  3.56it/s]

Epoch: 2, Loss: 3.087100028991699


Processing epoch 01:   7%|▋         | 354/4850 [01:40<21:00,  3.57it/s]

Epoch: 2, Loss: 2.8084726333618164


Processing epoch 01:   7%|▋         | 355/4850 [01:41<21:11,  3.54it/s]

Epoch: 2, Loss: 2.6563327312469482


Processing epoch 01:   7%|▋         | 356/4850 [01:41<21:25,  3.50it/s]

Epoch: 2, Loss: 3.8519115447998047


Processing epoch 01:   7%|▋         | 357/4850 [01:41<21:42,  3.45it/s]

Epoch: 2, Loss: 3.372490644454956


Processing epoch 01:   7%|▋         | 358/4850 [01:42<21:31,  3.48it/s]

Epoch: 2, Loss: 3.0509958267211914


Processing epoch 01:   7%|▋         | 359/4850 [01:42<21:29,  3.48it/s]

Epoch: 2, Loss: 2.5845203399658203


Processing epoch 01:   7%|▋         | 360/4850 [01:42<21:48,  3.43it/s]

Epoch: 2, Loss: 3.03770112991333


Processing epoch 01:   7%|▋         | 361/4850 [01:43<22:11,  3.37it/s]

Epoch: 2, Loss: 2.2750229835510254


Processing epoch 01:   7%|▋         | 362/4850 [01:43<21:50,  3.43it/s]

Epoch: 2, Loss: 2.744349956512451


Processing epoch 01:   7%|▋         | 363/4850 [01:43<22:14,  3.36it/s]

Epoch: 2, Loss: 2.2378807067871094


Processing epoch 01:   8%|▊         | 364/4850 [01:43<22:30,  3.32it/s]

Epoch: 2, Loss: 2.928865432739258


Processing epoch 01:   8%|▊         | 365/4850 [01:44<22:19,  3.35it/s]

Epoch: 2, Loss: 2.8237860202789307


Processing epoch 01:   8%|▊         | 366/4850 [01:44<22:26,  3.33it/s]

Epoch: 2, Loss: 2.942439317703247


Processing epoch 01:   8%|▊         | 367/4850 [01:44<22:23,  3.34it/s]

Epoch: 2, Loss: 2.480283737182617


Processing epoch 01:   8%|▊         | 368/4850 [01:45<22:30,  3.32it/s]

Epoch: 2, Loss: 2.4534153938293457


Processing epoch 01:   8%|▊         | 369/4850 [01:45<21:58,  3.40it/s]

Epoch: 2, Loss: 3.1146278381347656


Processing epoch 01:   8%|▊         | 370/4850 [01:45<21:41,  3.44it/s]

Epoch: 2, Loss: 2.9596939086914062


Processing epoch 01:   8%|▊         | 371/4850 [01:45<21:29,  3.47it/s]

Epoch: 2, Loss: 3.2257890701293945


Processing epoch 01:   8%|▊         | 372/4850 [01:46<21:16,  3.51it/s]

Epoch: 2, Loss: 2.3483715057373047


Processing epoch 01:   8%|▊         | 373/4850 [01:46<21:05,  3.54it/s]

Epoch: 2, Loss: 2.5677695274353027


Processing epoch 01:   8%|▊         | 374/4850 [01:46<21:02,  3.55it/s]

Epoch: 2, Loss: 2.8018596172332764


Processing epoch 01:   8%|▊         | 375/4850 [01:47<20:58,  3.56it/s]

Epoch: 2, Loss: 2.671595573425293


Processing epoch 01:   8%|▊         | 376/4850 [01:47<21:01,  3.55it/s]

Epoch: 2, Loss: 2.895026445388794


Processing epoch 01:   8%|▊         | 377/4850 [01:47<21:00,  3.55it/s]

Epoch: 2, Loss: 2.4268155097961426


Processing epoch 01:   8%|▊         | 378/4850 [01:47<20:59,  3.55it/s]

Epoch: 2, Loss: 2.324796676635742


Processing epoch 01:   8%|▊         | 379/4850 [01:48<21:00,  3.55it/s]

Epoch: 2, Loss: 2.4471797943115234


Processing epoch 01:   8%|▊         | 380/4850 [01:48<20:57,  3.56it/s]

Epoch: 2, Loss: 2.278419017791748


Processing epoch 01:   8%|▊         | 381/4850 [01:48<20:55,  3.56it/s]

Epoch: 2, Loss: 2.770406484603882


Processing epoch 01:   8%|▊         | 382/4850 [01:49<20:50,  3.57it/s]

Epoch: 2, Loss: 2.5488595962524414


Processing epoch 01:   8%|▊         | 383/4850 [01:49<20:48,  3.58it/s]

Epoch: 2, Loss: 2.484806537628174


Processing epoch 01:   8%|▊         | 384/4850 [01:49<20:56,  3.55it/s]

Epoch: 2, Loss: 2.1827290058135986


Processing epoch 01:   8%|▊         | 385/4850 [01:49<21:02,  3.54it/s]

Epoch: 2, Loss: 2.820674180984497


Processing epoch 01:   8%|▊         | 386/4850 [01:50<20:58,  3.55it/s]

Epoch: 2, Loss: 2.6516001224517822


Processing epoch 01:   8%|▊         | 387/4850 [01:50<20:48,  3.58it/s]

Epoch: 2, Loss: 2.6629533767700195


Processing epoch 01:   8%|▊         | 388/4850 [01:50<20:47,  3.58it/s]

Epoch: 2, Loss: 2.869051218032837


Processing epoch 01:   8%|▊         | 389/4850 [01:51<20:45,  3.58it/s]

Epoch: 2, Loss: 3.4446797370910645


Processing epoch 01:   8%|▊         | 390/4850 [01:51<20:46,  3.58it/s]

Epoch: 2, Loss: 2.9114737510681152


Processing epoch 01:   8%|▊         | 391/4850 [01:51<20:48,  3.57it/s]

Epoch: 2, Loss: 2.473579168319702


Processing epoch 01:   8%|▊         | 392/4850 [01:51<20:49,  3.57it/s]

Epoch: 2, Loss: 2.4771223068237305


Processing epoch 01:   8%|▊         | 393/4850 [01:52<20:50,  3.56it/s]

Epoch: 2, Loss: 2.4398064613342285


Processing epoch 01:   8%|▊         | 394/4850 [01:52<20:49,  3.57it/s]

Epoch: 2, Loss: 2.399679183959961


Processing epoch 01:   8%|▊         | 395/4850 [01:52<20:47,  3.57it/s]

Epoch: 2, Loss: 2.738708972930908


Processing epoch 01:   8%|▊         | 396/4850 [01:52<20:49,  3.56it/s]

Epoch: 2, Loss: 2.954441785812378


Processing epoch 01:   8%|▊         | 397/4850 [01:53<20:46,  3.57it/s]

Epoch: 2, Loss: 2.902895450592041


Processing epoch 01:   8%|▊         | 398/4850 [01:53<20:43,  3.58it/s]

Epoch: 2, Loss: 2.681097984313965


Processing epoch 01:   8%|▊         | 399/4850 [01:53<20:50,  3.56it/s]

Epoch: 2, Loss: 2.560244083404541


Processing epoch 01:   8%|▊         | 400/4850 [01:54<20:54,  3.55it/s]

Epoch: 2, Loss: 2.7926275730133057


Processing epoch 01:   8%|▊         | 401/4850 [01:54<20:56,  3.54it/s]

Epoch: 2, Loss: 3.0657825469970703


Processing epoch 01:   8%|▊         | 402/4850 [01:54<21:00,  3.53it/s]

Epoch: 2, Loss: 2.8674490451812744


Processing epoch 01:   8%|▊         | 403/4850 [01:54<20:56,  3.54it/s]

Epoch: 2, Loss: 3.0799639225006104


Processing epoch 01:   8%|▊         | 404/4850 [01:55<20:48,  3.56it/s]

Epoch: 2, Loss: 2.893984794616699


Processing epoch 01:   8%|▊         | 405/4850 [01:55<21:06,  3.51it/s]

Epoch: 2, Loss: 2.5792737007141113


Processing epoch 01:   8%|▊         | 406/4850 [01:55<20:55,  3.54it/s]

Epoch: 2, Loss: 3.32503604888916


Processing epoch 01:   8%|▊         | 407/4850 [01:56<21:03,  3.52it/s]

Epoch: 2, Loss: 2.761343002319336


Processing epoch 01:   8%|▊         | 408/4850 [01:56<21:32,  3.44it/s]

Epoch: 2, Loss: 2.611873149871826


Processing epoch 01:   8%|▊         | 409/4850 [01:56<21:45,  3.40it/s]

Epoch: 2, Loss: 2.7436161041259766


Processing epoch 01:   8%|▊         | 410/4850 [01:57<22:10,  3.34it/s]

Epoch: 2, Loss: 2.137503147125244


Processing epoch 01:   8%|▊         | 411/4850 [01:57<22:18,  3.32it/s]

Epoch: 2, Loss: 2.9235239028930664


Processing epoch 01:   8%|▊         | 412/4850 [01:57<22:15,  3.32it/s]

Epoch: 2, Loss: 2.2845699787139893


Processing epoch 01:   9%|▊         | 413/4850 [01:57<21:58,  3.37it/s]

Epoch: 2, Loss: 3.083533763885498


Processing epoch 01:   9%|▊         | 414/4850 [01:58<21:51,  3.38it/s]

Epoch: 2, Loss: 2.7153701782226562


Processing epoch 01:   9%|▊         | 415/4850 [01:58<21:52,  3.38it/s]

Epoch: 2, Loss: 2.566878318786621


Processing epoch 01:   9%|▊         | 416/4850 [01:58<21:50,  3.38it/s]

Epoch: 2, Loss: 2.9517955780029297


Processing epoch 01:   9%|▊         | 417/4850 [01:59<21:54,  3.37it/s]

Epoch: 2, Loss: 3.4110193252563477


Processing epoch 01:   9%|▊         | 418/4850 [01:59<21:42,  3.40it/s]

Epoch: 2, Loss: 2.545773983001709


Processing epoch 01:   9%|▊         | 419/4850 [01:59<21:23,  3.45it/s]

Epoch: 2, Loss: 2.281785488128662


Processing epoch 01:   9%|▊         | 420/4850 [01:59<21:09,  3.49it/s]

Epoch: 2, Loss: 2.433182716369629


Processing epoch 01:   9%|▊         | 421/4850 [02:00<20:59,  3.52it/s]

Epoch: 2, Loss: 3.167701005935669


Processing epoch 01:   9%|▊         | 422/4850 [02:00<20:42,  3.56it/s]

Epoch: 2, Loss: 3.357161521911621


Processing epoch 01:   9%|▊         | 423/4850 [02:00<20:40,  3.57it/s]

Epoch: 2, Loss: 2.107588052749634


Processing epoch 01:   9%|▊         | 424/4850 [02:01<20:48,  3.54it/s]

Epoch: 2, Loss: 3.0337538719177246


Processing epoch 01:   9%|▉         | 425/4850 [02:01<20:56,  3.52it/s]

Epoch: 2, Loss: 3.1856307983398438


Processing epoch 01:   9%|▉         | 426/4850 [02:01<20:50,  3.54it/s]

Epoch: 2, Loss: 2.632664680480957


Processing epoch 01:   9%|▉         | 427/4850 [02:01<20:50,  3.54it/s]

Epoch: 2, Loss: 2.2030327320098877


Processing epoch 01:   9%|▉         | 428/4850 [02:02<20:53,  3.53it/s]

Epoch: 2, Loss: 2.8615000247955322


Processing epoch 01:   9%|▉         | 429/4850 [02:02<20:56,  3.52it/s]

Epoch: 2, Loss: 2.154468059539795


Processing epoch 01:   9%|▉         | 430/4850 [02:02<20:54,  3.52it/s]

Epoch: 2, Loss: 2.572885036468506


Processing epoch 01:   9%|▉         | 431/4850 [02:03<20:51,  3.53it/s]

Epoch: 2, Loss: 2.4117794036865234


Processing epoch 01:   9%|▉         | 432/4850 [02:03<20:49,  3.54it/s]

Epoch: 2, Loss: 3.363159656524658


Processing epoch 01:   9%|▉         | 433/4850 [02:03<20:44,  3.55it/s]

Epoch: 2, Loss: 2.523472785949707


Processing epoch 01:   9%|▉         | 434/4850 [02:03<20:43,  3.55it/s]

Epoch: 2, Loss: 3.347780466079712


Processing epoch 01:   9%|▉         | 435/4850 [02:04<20:39,  3.56it/s]

Epoch: 2, Loss: 2.4740114212036133


Processing epoch 01:   9%|▉         | 436/4850 [02:04<20:54,  3.52it/s]

Epoch: 2, Loss: 3.319697380065918


Processing epoch 01:   9%|▉         | 437/4850 [02:04<20:53,  3.52it/s]

Epoch: 2, Loss: 2.875589370727539


Processing epoch 01:   9%|▉         | 438/4850 [02:05<20:53,  3.52it/s]

Epoch: 2, Loss: 1.9562246799468994


Processing epoch 01:   9%|▉         | 439/4850 [02:05<20:44,  3.54it/s]

Epoch: 2, Loss: 3.742292881011963


Processing epoch 01:   9%|▉         | 440/4850 [02:05<20:41,  3.55it/s]

Epoch: 2, Loss: 2.7331833839416504


Processing epoch 01:   9%|▉         | 441/4850 [02:05<20:34,  3.57it/s]

Epoch: 2, Loss: 2.6250011920928955


Processing epoch 01:   9%|▉         | 442/4850 [02:06<20:31,  3.58it/s]

Epoch: 2, Loss: 2.618680953979492


Processing epoch 01:   9%|▉         | 443/4850 [02:06<20:37,  3.56it/s]

Epoch: 2, Loss: 2.5865044593811035


Processing epoch 01:   9%|▉         | 444/4850 [02:06<20:39,  3.55it/s]

Epoch: 2, Loss: 2.4796178340911865


Processing epoch 01:   9%|▉         | 445/4850 [02:06<20:39,  3.55it/s]

Epoch: 2, Loss: 2.8461830615997314


Processing epoch 01:   9%|▉         | 446/4850 [02:07<20:40,  3.55it/s]

Epoch: 2, Loss: 2.313840866088867


Processing epoch 01:   9%|▉         | 447/4850 [02:07<20:48,  3.53it/s]

Epoch: 2, Loss: 2.36586332321167


Processing epoch 01:   9%|▉         | 448/4850 [02:07<20:44,  3.54it/s]

Epoch: 2, Loss: 3.044678211212158


Processing epoch 01:   9%|▉         | 449/4850 [02:08<20:49,  3.52it/s]

Epoch: 2, Loss: 2.477415084838867


Processing epoch 01:   9%|▉         | 450/4850 [02:08<20:55,  3.50it/s]

Epoch: 2, Loss: 2.441807270050049


Processing epoch 01:   9%|▉         | 451/4850 [02:08<20:45,  3.53it/s]

Epoch: 2, Loss: 3.789212226867676


Processing epoch 01:   9%|▉         | 452/4850 [02:08<20:45,  3.53it/s]

Epoch: 2, Loss: 3.0062851905822754


Processing epoch 01:   9%|▉         | 453/4850 [02:09<20:46,  3.53it/s]

Epoch: 2, Loss: 2.5052735805511475


Processing epoch 01:   9%|▉         | 454/4850 [02:09<20:52,  3.51it/s]

Epoch: 2, Loss: 3.598801612854004


Processing epoch 01:   9%|▉         | 455/4850 [02:09<20:52,  3.51it/s]

Epoch: 2, Loss: 2.504739284515381


Processing epoch 01:   9%|▉         | 456/4850 [02:10<21:07,  3.47it/s]

Epoch: 2, Loss: 2.7147088050842285


Processing epoch 01:   9%|▉         | 457/4850 [02:10<21:01,  3.48it/s]

Epoch: 2, Loss: 3.2491836547851562


Processing epoch 01:   9%|▉         | 458/4850 [02:10<21:14,  3.44it/s]

Epoch: 2, Loss: 4.700453758239746


Processing epoch 01:   9%|▉         | 459/4850 [02:11<21:38,  3.38it/s]

Epoch: 2, Loss: 3.3164801597595215


Processing epoch 01:   9%|▉         | 460/4850 [02:11<21:42,  3.37it/s]

Epoch: 2, Loss: 3.0855071544647217


Processing epoch 01:  10%|▉         | 461/4850 [02:11<21:34,  3.39it/s]

Epoch: 2, Loss: 2.6845197677612305


Processing epoch 01:  10%|▉         | 462/4850 [02:11<21:37,  3.38it/s]

Epoch: 2, Loss: 2.3458714485168457


Processing epoch 01:  10%|▉         | 463/4850 [02:12<22:00,  3.32it/s]

Epoch: 2, Loss: 3.057936668395996


Processing epoch 01:  10%|▉         | 464/4850 [02:12<21:55,  3.33it/s]

Epoch: 2, Loss: 3.5131218433380127


Processing epoch 01:  10%|▉         | 465/4850 [02:12<21:57,  3.33it/s]

Epoch: 2, Loss: 2.8803324699401855


Processing epoch 01:  10%|▉         | 466/4850 [02:13<21:30,  3.40it/s]

Epoch: 2, Loss: 2.9780516624450684


Processing epoch 01:  10%|▉         | 467/4850 [02:13<21:08,  3.46it/s]

Epoch: 2, Loss: 2.7964563369750977


Processing epoch 01:  10%|▉         | 468/4850 [02:13<20:59,  3.48it/s]

Epoch: 2, Loss: 2.2561604976654053


Processing epoch 01:  10%|▉         | 469/4850 [02:13<20:48,  3.51it/s]

Epoch: 2, Loss: 4.067252159118652


Processing epoch 01:  10%|▉         | 470/4850 [02:14<20:40,  3.53it/s]

Epoch: 2, Loss: 2.847036361694336


Processing epoch 01:  10%|▉         | 471/4850 [02:14<20:35,  3.54it/s]

Epoch: 2, Loss: 2.8228461742401123


Processing epoch 01:  10%|▉         | 472/4850 [02:14<20:31,  3.55it/s]

Epoch: 2, Loss: 3.8591904640197754


Processing epoch 01:  10%|▉         | 473/4850 [02:15<20:34,  3.55it/s]

Epoch: 2, Loss: 2.51595401763916


Processing epoch 01:  10%|▉         | 474/4850 [02:15<20:30,  3.56it/s]

Epoch: 2, Loss: 3.037198066711426


Processing epoch 01:  10%|▉         | 475/4850 [02:15<20:27,  3.56it/s]

Epoch: 2, Loss: 2.435401439666748


Processing epoch 01:  10%|▉         | 476/4850 [02:15<20:24,  3.57it/s]

Epoch: 2, Loss: 3.050659418106079


Processing epoch 01:  10%|▉         | 477/4850 [02:16<20:28,  3.56it/s]

Epoch: 2, Loss: 2.7837820053100586


Processing epoch 01:  10%|▉         | 478/4850 [02:16<20:27,  3.56it/s]

Epoch: 2, Loss: 3.298473834991455


Processing epoch 01:  10%|▉         | 479/4850 [02:16<20:30,  3.55it/s]

Epoch: 2, Loss: 2.774362087249756


Processing epoch 01:  10%|▉         | 480/4850 [02:17<20:25,  3.57it/s]

Epoch: 2, Loss: 3.213524341583252


Processing epoch 01:  10%|▉         | 481/4850 [02:17<20:30,  3.55it/s]

Epoch: 2, Loss: 3.2791528701782227


Processing epoch 01:  10%|▉         | 482/4850 [02:17<20:26,  3.56it/s]

Epoch: 2, Loss: 2.206728219985962


Processing epoch 01:  10%|▉         | 483/4850 [02:17<20:33,  3.54it/s]

Epoch: 2, Loss: 2.6027655601501465


Processing epoch 01:  10%|▉         | 484/4850 [02:18<20:29,  3.55it/s]

Epoch: 2, Loss: 2.6131770610809326


Processing epoch 01:  10%|█         | 485/4850 [02:18<20:23,  3.57it/s]

Epoch: 2, Loss: 3.333270788192749


Processing epoch 01:  10%|█         | 486/4850 [02:18<20:26,  3.56it/s]

Epoch: 2, Loss: 3.3150296211242676


Processing epoch 01:  10%|█         | 487/4850 [02:19<20:26,  3.56it/s]

Epoch: 2, Loss: 2.5692214965820312


Processing epoch 01:  10%|█         | 488/4850 [02:19<20:28,  3.55it/s]

Epoch: 2, Loss: 3.1895053386688232


Processing epoch 01:  10%|█         | 489/4850 [02:19<20:28,  3.55it/s]

Epoch: 2, Loss: 2.491819381713867


Processing epoch 01:  10%|█         | 490/4850 [02:19<20:24,  3.56it/s]

Epoch: 2, Loss: 2.857490062713623


Processing epoch 01:  10%|█         | 491/4850 [02:20<20:19,  3.57it/s]

Epoch: 2, Loss: 3.216123104095459


Processing epoch 01:  10%|█         | 492/4850 [02:20<20:17,  3.58it/s]

Epoch: 2, Loss: 2.5238773822784424


Processing epoch 01:  10%|█         | 493/4850 [02:20<20:21,  3.57it/s]

Epoch: 2, Loss: 3.0562872886657715


Processing epoch 01:  10%|█         | 494/4850 [02:20<20:22,  3.56it/s]

Epoch: 2, Loss: 3.006387233734131


Processing epoch 01:  10%|█         | 495/4850 [02:21<20:17,  3.58it/s]

Epoch: 2, Loss: 4.083798408508301


Processing epoch 01:  10%|█         | 496/4850 [02:21<20:16,  3.58it/s]

Epoch: 2, Loss: 2.4108943939208984


Processing epoch 01:  10%|█         | 497/4850 [02:21<20:22,  3.56it/s]

Epoch: 2, Loss: 2.5661377906799316


Processing epoch 01:  10%|█         | 498/4850 [02:22<20:31,  3.53it/s]

Epoch: 2, Loss: 2.7307119369506836


Processing epoch 01:  10%|█         | 499/4850 [02:22<20:30,  3.54it/s]

Epoch: 2, Loss: 3.013343334197998


Processing epoch 01:  10%|█         | 500/4850 [02:22<20:21,  3.56it/s]

Epoch: 2, Loss: 3.1947083473205566


Processing epoch 01:  10%|█         | 501/4850 [02:22<20:19,  3.57it/s]

Epoch: 2, Loss: 2.9127373695373535


Processing epoch 01:  10%|█         | 502/4850 [02:23<20:48,  3.48it/s]

Epoch: 2, Loss: 3.0924363136291504


Processing epoch 01:  10%|█         | 503/4850 [02:23<20:47,  3.49it/s]

Epoch: 2, Loss: 3.094841480255127


Processing epoch 01:  10%|█         | 504/4850 [02:23<21:04,  3.44it/s]

Epoch: 2, Loss: 2.6878085136413574


Processing epoch 01:  10%|█         | 505/4850 [02:24<21:25,  3.38it/s]

Epoch: 2, Loss: 2.7164831161499023


Processing epoch 01:  10%|█         | 506/4850 [02:24<21:08,  3.42it/s]

Epoch: 2, Loss: 2.8988852500915527


Processing epoch 01:  10%|█         | 507/4850 [02:24<21:03,  3.44it/s]

Epoch: 2, Loss: 4.435296058654785


Processing epoch 01:  10%|█         | 508/4850 [02:25<21:14,  3.41it/s]

Epoch: 2, Loss: 2.7802414894104004


Processing epoch 01:  10%|█         | 509/4850 [02:25<21:24,  3.38it/s]

Epoch: 2, Loss: 2.915511131286621


Processing epoch 01:  11%|█         | 510/4850 [02:25<21:21,  3.39it/s]

Epoch: 2, Loss: 2.456939697265625


Processing epoch 01:  11%|█         | 511/4850 [02:25<21:11,  3.41it/s]

Epoch: 2, Loss: 2.3046932220458984


Processing epoch 01:  11%|█         | 512/4850 [02:26<21:04,  3.43it/s]

Epoch: 2, Loss: 2.5312435626983643


Processing epoch 01:  11%|█         | 513/4850 [02:26<21:11,  3.41it/s]

Epoch: 2, Loss: 2.362651824951172


Processing epoch 01:  11%|█         | 514/4850 [02:26<21:01,  3.44it/s]

Epoch: 2, Loss: 3.4988861083984375


Processing epoch 01:  11%|█         | 515/4850 [02:27<20:58,  3.45it/s]

Epoch: 2, Loss: 2.9899916648864746


Processing epoch 01:  11%|█         | 516/4850 [02:27<20:44,  3.48it/s]

Epoch: 2, Loss: 2.362834930419922


Processing epoch 01:  11%|█         | 517/4850 [02:27<20:32,  3.52it/s]

Epoch: 2, Loss: 2.202366590499878


Processing epoch 01:  11%|█         | 518/4850 [02:27<20:20,  3.55it/s]

Epoch: 2, Loss: 3.433986186981201


Processing epoch 01:  11%|█         | 519/4850 [02:28<20:16,  3.56it/s]

Epoch: 2, Loss: 2.6013872623443604


Processing epoch 01:  11%|█         | 520/4850 [02:28<20:17,  3.56it/s]

Epoch: 2, Loss: 2.6093103885650635


Processing epoch 01:  11%|█         | 521/4850 [02:28<20:15,  3.56it/s]

Epoch: 2, Loss: 2.788498640060425


Processing epoch 01:  11%|█         | 522/4850 [02:28<20:07,  3.58it/s]

Epoch: 2, Loss: 2.9763448238372803


Processing epoch 01:  11%|█         | 523/4850 [02:29<20:03,  3.59it/s]

Epoch: 2, Loss: 4.3186163902282715


Processing epoch 01:  11%|█         | 524/4850 [02:29<20:10,  3.57it/s]

Epoch: 2, Loss: 3.0254688262939453


Processing epoch 01:  11%|█         | 525/4850 [02:29<20:05,  3.59it/s]

Epoch: 2, Loss: 2.9078006744384766


Processing epoch 01:  11%|█         | 526/4850 [02:30<20:08,  3.58it/s]

Epoch: 2, Loss: 2.7602314949035645


Processing epoch 01:  11%|█         | 527/4850 [02:30<20:11,  3.57it/s]

Epoch: 2, Loss: 2.939502000808716


Processing epoch 01:  11%|█         | 528/4850 [02:30<20:10,  3.57it/s]

Epoch: 2, Loss: 2.177478551864624


Processing epoch 01:  11%|█         | 529/4850 [02:30<20:04,  3.59it/s]

Epoch: 2, Loss: 2.669778823852539


Processing epoch 01:  11%|█         | 530/4850 [02:31<20:05,  3.58it/s]

Epoch: 2, Loss: 2.9461112022399902


Processing epoch 01:  11%|█         | 531/4850 [02:31<20:10,  3.57it/s]

Epoch: 2, Loss: 2.149258852005005


Processing epoch 01:  11%|█         | 532/4850 [02:31<20:04,  3.58it/s]

Epoch: 2, Loss: 2.2404427528381348


Processing epoch 01:  11%|█         | 533/4850 [02:32<20:08,  3.57it/s]

Epoch: 2, Loss: 3.002194881439209


Processing epoch 01:  11%|█         | 534/4850 [02:32<20:08,  3.57it/s]

Epoch: 2, Loss: 3.3398003578186035


Processing epoch 01:  11%|█         | 535/4850 [02:32<20:02,  3.59it/s]

Epoch: 2, Loss: 2.8003554344177246


Processing epoch 01:  11%|█         | 536/4850 [02:32<19:59,  3.60it/s]

Epoch: 2, Loss: 3.401729106903076


Processing epoch 01:  11%|█         | 537/4850 [02:33<19:58,  3.60it/s]

Epoch: 2, Loss: 2.834317445755005


Processing epoch 01:  11%|█         | 538/4850 [02:33<20:14,  3.55it/s]

Epoch: 2, Loss: 2.973750114440918


Processing epoch 01:  11%|█         | 539/4850 [02:33<20:13,  3.55it/s]

Epoch: 2, Loss: 2.4739749431610107


Processing epoch 01:  11%|█         | 540/4850 [02:34<20:11,  3.56it/s]

Epoch: 2, Loss: 2.7355194091796875


Processing epoch 01:  11%|█         | 541/4850 [02:34<20:07,  3.57it/s]

Epoch: 2, Loss: 2.877030611038208


Processing epoch 01:  11%|█         | 542/4850 [02:34<20:07,  3.57it/s]

Epoch: 2, Loss: 2.7549190521240234


Processing epoch 01:  11%|█         | 543/4850 [02:34<20:04,  3.57it/s]

Epoch: 2, Loss: 2.7064626216888428


Processing epoch 01:  11%|█         | 544/4850 [02:35<20:02,  3.58it/s]

Epoch: 2, Loss: 3.5503058433532715


Processing epoch 01:  11%|█         | 545/4850 [02:35<20:11,  3.55it/s]

Epoch: 2, Loss: 3.556593656539917


Processing epoch 01:  11%|█▏        | 546/4850 [02:35<20:06,  3.57it/s]

Epoch: 2, Loss: 3.7154674530029297


Processing epoch 01:  11%|█▏        | 547/4850 [02:35<19:59,  3.59it/s]

Epoch: 2, Loss: 3.2060413360595703


Processing epoch 01:  11%|█▏        | 548/4850 [02:36<20:15,  3.54it/s]

Epoch: 2, Loss: 1.9628727436065674


Processing epoch 01:  11%|█▏        | 549/4850 [02:36<20:32,  3.49it/s]

Epoch: 2, Loss: 2.371880054473877


Processing epoch 01:  11%|█▏        | 550/4850 [02:36<20:28,  3.50it/s]

Epoch: 2, Loss: 2.579282522201538


Processing epoch 01:  11%|█▏        | 551/4850 [02:37<20:45,  3.45it/s]

Epoch: 2, Loss: 2.6943695545196533


Processing epoch 01:  11%|█▏        | 552/4850 [02:37<21:06,  3.39it/s]

Epoch: 2, Loss: 2.6809141635894775


Processing epoch 01:  11%|█▏        | 553/4850 [02:37<20:57,  3.42it/s]

Epoch: 2, Loss: 2.5773911476135254


Processing epoch 01:  11%|█▏        | 554/4850 [02:38<21:03,  3.40it/s]

Epoch: 2, Loss: 2.282290458679199


Processing epoch 01:  11%|█▏        | 555/4850 [02:38<21:18,  3.36it/s]

Epoch: 2, Loss: 4.479580402374268


Processing epoch 01:  11%|█▏        | 556/4850 [02:38<21:25,  3.34it/s]

Epoch: 2, Loss: 2.61588978767395


Processing epoch 01:  11%|█▏        | 557/4850 [02:38<21:50,  3.27it/s]

Epoch: 2, Loss: 2.636387825012207


Processing epoch 01:  12%|█▏        | 558/4850 [02:39<21:38,  3.30it/s]

Epoch: 2, Loss: 2.4003138542175293


Processing epoch 01:  12%|█▏        | 559/4850 [02:39<21:33,  3.32it/s]

Epoch: 2, Loss: 2.3839385509490967


Processing epoch 01:  12%|█▏        | 560/4850 [02:39<21:19,  3.35it/s]

Epoch: 2, Loss: 2.751978874206543


Processing epoch 01:  12%|█▏        | 561/4850 [02:40<21:23,  3.34it/s]

Epoch: 2, Loss: 2.9426069259643555


Processing epoch 01:  12%|█▏        | 562/4850 [02:40<21:00,  3.40it/s]

Epoch: 2, Loss: 2.704911708831787


Processing epoch 01:  12%|█▏        | 563/4850 [02:40<21:06,  3.38it/s]

Epoch: 2, Loss: 2.7769696712493896


Processing epoch 01:  12%|█▏        | 564/4850 [02:41<21:27,  3.33it/s]

Epoch: 2, Loss: 3.5099992752075195


Processing epoch 01:  12%|█▏        | 565/4850 [02:41<20:59,  3.40it/s]

Epoch: 2, Loss: 2.671109676361084


Processing epoch 01:  12%|█▏        | 566/4850 [02:41<20:43,  3.44it/s]

Epoch: 2, Loss: 2.7435197830200195


Processing epoch 01:  12%|█▏        | 567/4850 [02:41<20:40,  3.45it/s]

Epoch: 2, Loss: 2.548569679260254


Processing epoch 01:  12%|█▏        | 568/4850 [02:42<20:26,  3.49it/s]

Epoch: 2, Loss: 3.232881546020508


Processing epoch 01:  12%|█▏        | 569/4850 [02:42<20:18,  3.51it/s]

Epoch: 2, Loss: 2.3793883323669434


Processing epoch 01:  12%|█▏        | 570/4850 [02:42<20:18,  3.51it/s]

Epoch: 2, Loss: 3.01887845993042


Processing epoch 01:  12%|█▏        | 571/4850 [02:43<20:16,  3.52it/s]

Epoch: 2, Loss: 2.2154159545898438


Processing epoch 01:  12%|█▏        | 572/4850 [02:43<20:10,  3.54it/s]

Epoch: 2, Loss: 2.5575051307678223


Processing epoch 01:  12%|█▏        | 573/4850 [02:43<20:08,  3.54it/s]

Epoch: 2, Loss: 2.334078788757324


Processing epoch 01:  12%|█▏        | 574/4850 [02:43<20:12,  3.53it/s]

Epoch: 2, Loss: 2.533857583999634


Processing epoch 01:  12%|█▏        | 575/4850 [02:44<20:04,  3.55it/s]

Epoch: 2, Loss: 2.4410624504089355


Processing epoch 01:  12%|█▏        | 576/4850 [02:44<19:57,  3.57it/s]

Epoch: 2, Loss: 2.858412504196167


Processing epoch 01:  12%|█▏        | 577/4850 [02:44<20:04,  3.55it/s]

Epoch: 2, Loss: 2.4641313552856445


Processing epoch 01:  12%|█▏        | 578/4850 [02:45<20:11,  3.53it/s]

Epoch: 2, Loss: 3.113173484802246


Processing epoch 01:  12%|█▏        | 579/4850 [02:45<20:04,  3.55it/s]

Epoch: 2, Loss: 2.990675926208496


Processing epoch 01:  12%|█▏        | 580/4850 [02:45<20:05,  3.54it/s]

Epoch: 2, Loss: 2.9574923515319824


Processing epoch 01:  12%|█▏        | 581/4850 [02:45<19:59,  3.56it/s]

Epoch: 2, Loss: 2.9334540367126465


Processing epoch 01:  12%|█▏        | 582/4850 [02:46<20:04,  3.54it/s]

Epoch: 2, Loss: 3.1953468322753906


Processing epoch 01:  12%|█▏        | 583/4850 [02:46<20:03,  3.55it/s]

Epoch: 2, Loss: 2.6593854427337646


Processing epoch 01:  12%|█▏        | 584/4850 [02:46<20:07,  3.53it/s]

Epoch: 2, Loss: 2.7103729248046875


Processing epoch 01:  12%|█▏        | 585/4850 [02:46<20:09,  3.53it/s]

Epoch: 2, Loss: 2.9101462364196777


Processing epoch 01:  12%|█▏        | 586/4850 [02:47<20:11,  3.52it/s]

Epoch: 2, Loss: 2.645470380783081


Processing epoch 01:  12%|█▏        | 587/4850 [02:47<20:07,  3.53it/s]

Epoch: 2, Loss: 2.526149034500122


Processing epoch 01:  12%|█▏        | 588/4850 [02:47<20:08,  3.53it/s]

Epoch: 2, Loss: 2.6283674240112305


Processing epoch 01:  12%|█▏        | 589/4850 [02:48<20:06,  3.53it/s]

Epoch: 2, Loss: 2.4543468952178955


Processing epoch 01:  12%|█▏        | 590/4850 [02:48<20:07,  3.53it/s]

Epoch: 2, Loss: 2.4427385330200195


Processing epoch 01:  12%|█▏        | 591/4850 [02:48<20:03,  3.54it/s]

Epoch: 2, Loss: 2.8045730590820312


Processing epoch 01:  12%|█▏        | 592/4850 [02:48<19:56,  3.56it/s]

Epoch: 2, Loss: 2.3233182430267334


Processing epoch 01:  12%|█▏        | 593/4850 [02:49<19:53,  3.57it/s]

Epoch: 2, Loss: 2.531075954437256


Processing epoch 01:  12%|█▏        | 594/4850 [02:49<19:51,  3.57it/s]

Epoch: 2, Loss: 2.421076774597168


Processing epoch 01:  12%|█▏        | 595/4850 [02:49<19:47,  3.58it/s]

Epoch: 2, Loss: 2.9893767833709717


Processing epoch 01:  12%|█▏        | 596/4850 [02:50<19:59,  3.55it/s]

Epoch: 2, Loss: 3.309346914291382


Processing epoch 01:  12%|█▏        | 597/4850 [02:50<20:02,  3.54it/s]

Epoch: 2, Loss: 3.1279118061065674


Processing epoch 01:  12%|█▏        | 598/4850 [02:50<19:56,  3.55it/s]

Epoch: 2, Loss: 2.536623954772949


Processing epoch 01:  12%|█▏        | 599/4850 [02:50<19:59,  3.54it/s]

Epoch: 2, Loss: 2.6924424171447754


Processing epoch 01:  12%|█▏        | 600/4850 [02:51<20:06,  3.52it/s]

Epoch: 2, Loss: 2.7744038105010986


Processing epoch 01:  12%|█▏        | 601/4850 [02:51<20:24,  3.47it/s]

Epoch: 2, Loss: 2.424839496612549


Processing epoch 01:  12%|█▏        | 602/4850 [02:51<20:28,  3.46it/s]

Epoch: 2, Loss: 2.994691848754883


Processing epoch 01:  12%|█▏        | 603/4850 [02:52<20:15,  3.49it/s]

Epoch: 2, Loss: 2.838489532470703


Processing epoch 01:  12%|█▏        | 604/4850 [02:52<20:42,  3.42it/s]

Epoch: 2, Loss: 2.64705228805542


Processing epoch 01:  12%|█▏        | 605/4850 [02:52<20:47,  3.40it/s]

Epoch: 2, Loss: 2.6174631118774414


Processing epoch 01:  12%|█▏        | 606/4850 [02:53<21:12,  3.34it/s]

Epoch: 2, Loss: 2.461287498474121


Processing epoch 01:  13%|█▎        | 607/4850 [02:53<21:19,  3.32it/s]

Epoch: 2, Loss: 2.856220006942749


Processing epoch 01:  13%|█▎        | 608/4850 [02:53<21:08,  3.34it/s]

Epoch: 2, Loss: 2.7598791122436523


Processing epoch 01:  13%|█▎        | 609/4850 [02:53<20:50,  3.39it/s]

Epoch: 2, Loss: 3.911184310913086


Processing epoch 01:  13%|█▎        | 610/4850 [02:54<21:07,  3.35it/s]

Epoch: 2, Loss: 2.9243619441986084


Processing epoch 01:  13%|█▎        | 611/4850 [02:54<21:19,  3.31it/s]

Epoch: 2, Loss: 2.46397066116333


Processing epoch 01:  13%|█▎        | 612/4850 [02:54<21:17,  3.32it/s]

Epoch: 2, Loss: 2.645216464996338


Processing epoch 01:  13%|█▎        | 613/4850 [02:55<20:58,  3.37it/s]

Epoch: 2, Loss: 2.6705586910247803


Processing epoch 01:  13%|█▎        | 614/4850 [02:55<20:51,  3.38it/s]

Epoch: 2, Loss: 2.3208789825439453


Processing epoch 01:  13%|█▎        | 615/4850 [02:55<20:31,  3.44it/s]

Epoch: 2, Loss: 3.762683391571045


Processing epoch 01:  13%|█▎        | 616/4850 [02:55<20:24,  3.46it/s]

Epoch: 2, Loss: 2.194079875946045


Processing epoch 01:  13%|█▎        | 617/4850 [02:56<20:15,  3.48it/s]

Epoch: 2, Loss: 2.699695348739624


Processing epoch 01:  13%|█▎        | 618/4850 [02:56<20:07,  3.50it/s]

Epoch: 2, Loss: 3.0623629093170166


Processing epoch 01:  13%|█▎        | 619/4850 [02:56<19:56,  3.54it/s]

Epoch: 2, Loss: 3.2706966400146484


Processing epoch 01:  13%|█▎        | 620/4850 [02:57<19:52,  3.55it/s]

Epoch: 2, Loss: 2.2902674674987793


Processing epoch 01:  13%|█▎        | 621/4850 [02:57<19:47,  3.56it/s]

Epoch: 2, Loss: 2.542633056640625


Processing epoch 01:  13%|█▎        | 622/4850 [02:57<19:49,  3.56it/s]

Epoch: 2, Loss: 2.887228488922119


Processing epoch 01:  13%|█▎        | 623/4850 [02:57<19:52,  3.55it/s]

Epoch: 2, Loss: 2.9087700843811035


Processing epoch 01:  13%|█▎        | 624/4850 [02:58<19:51,  3.55it/s]

Epoch: 2, Loss: 2.5436463356018066


Processing epoch 01:  13%|█▎        | 625/4850 [02:58<20:02,  3.51it/s]

Epoch: 2, Loss: 3.0741629600524902


Processing epoch 01:  13%|█▎        | 626/4850 [02:58<19:53,  3.54it/s]

Epoch: 2, Loss: 2.7816002368927


Processing epoch 01:  13%|█▎        | 627/4850 [02:59<19:51,  3.54it/s]

Epoch: 2, Loss: 2.8035900592803955


Processing epoch 01:  13%|█▎        | 628/4850 [02:59<19:46,  3.56it/s]

Epoch: 2, Loss: 3.2969422340393066


Processing epoch 01:  13%|█▎        | 629/4850 [02:59<19:55,  3.53it/s]

Epoch: 2, Loss: 2.7629292011260986


Processing epoch 01:  13%|█▎        | 630/4850 [02:59<19:56,  3.53it/s]

Epoch: 2, Loss: 2.9376237392425537


Processing epoch 01:  13%|█▎        | 631/4850 [03:00<19:52,  3.54it/s]

Epoch: 2, Loss: 2.7139902114868164


Processing epoch 01:  13%|█▎        | 632/4850 [03:00<19:50,  3.54it/s]

Epoch: 2, Loss: 2.9043822288513184


Processing epoch 01:  13%|█▎        | 633/4850 [03:00<19:52,  3.54it/s]

Epoch: 2, Loss: 1.9299452304840088


Processing epoch 01:  13%|█▎        | 634/4850 [03:01<19:47,  3.55it/s]

Epoch: 2, Loss: 2.6382389068603516


Processing epoch 01:  13%|█▎        | 635/4850 [03:01<19:37,  3.58it/s]

Epoch: 2, Loss: 2.990701675415039


Processing epoch 01:  13%|█▎        | 636/4850 [03:01<19:45,  3.56it/s]

Epoch: 2, Loss: 2.8899431228637695


Processing epoch 01:  13%|█▎        | 637/4850 [03:01<19:40,  3.57it/s]

Epoch: 2, Loss: 3.656135082244873


Processing epoch 01:  13%|█▎        | 638/4850 [03:02<19:42,  3.56it/s]

Epoch: 2, Loss: 2.806046962738037


Processing epoch 01:  13%|█▎        | 639/4850 [03:02<19:39,  3.57it/s]

Epoch: 2, Loss: 3.0143675804138184


Processing epoch 01:  13%|█▎        | 640/4850 [03:02<19:43,  3.56it/s]

Epoch: 2, Loss: 2.5152695178985596


Processing epoch 01:  13%|█▎        | 641/4850 [03:02<19:42,  3.56it/s]

Epoch: 2, Loss: 2.0940704345703125


Processing epoch 01:  13%|█▎        | 642/4850 [03:03<19:39,  3.57it/s]

Epoch: 2, Loss: 2.478536605834961


Processing epoch 01:  13%|█▎        | 643/4850 [03:03<19:39,  3.57it/s]

Epoch: 2, Loss: 2.2308285236358643


Processing epoch 01:  13%|█▎        | 644/4850 [03:03<19:40,  3.56it/s]

Epoch: 2, Loss: 2.212226152420044


Processing epoch 01:  13%|█▎        | 645/4850 [03:04<19:39,  3.56it/s]

Epoch: 2, Loss: 2.6280581951141357


Processing epoch 01:  13%|█▎        | 646/4850 [03:04<19:44,  3.55it/s]

Epoch: 2, Loss: 2.0457303524017334


Processing epoch 01:  13%|█▎        | 647/4850 [03:04<19:53,  3.52it/s]

Epoch: 2, Loss: 2.2734429836273193


Processing epoch 01:  13%|█▎        | 648/4850 [03:04<19:49,  3.53it/s]

Epoch: 2, Loss: 2.73691463470459


Processing epoch 01:  13%|█▎        | 649/4850 [03:05<20:09,  3.47it/s]

Epoch: 2, Loss: 2.927297353744507


Processing epoch 01:  13%|█▎        | 650/4850 [03:05<20:31,  3.41it/s]

Epoch: 2, Loss: 2.442686080932617


Processing epoch 01:  13%|█▎        | 651/4850 [03:05<20:31,  3.41it/s]

Epoch: 2, Loss: 2.465669631958008


Processing epoch 01:  13%|█▎        | 652/4850 [03:06<20:22,  3.43it/s]

Epoch: 2, Loss: 2.5734846591949463


Processing epoch 01:  13%|█▎        | 653/4850 [03:06<20:08,  3.47it/s]

Epoch: 2, Loss: 4.6612467765808105


Processing epoch 01:  13%|█▎        | 654/4850 [03:06<20:31,  3.41it/s]

Epoch: 2, Loss: 2.854551315307617


Processing epoch 01:  14%|█▎        | 655/4850 [03:07<20:59,  3.33it/s]

Epoch: 2, Loss: 2.9882330894470215


Processing epoch 01:  14%|█▎        | 656/4850 [03:07<21:00,  3.33it/s]

Epoch: 2, Loss: 2.719292640686035


Processing epoch 01:  14%|█▎        | 657/4850 [03:07<21:02,  3.32it/s]

Epoch: 2, Loss: 2.5579183101654053


Processing epoch 01:  14%|█▎        | 658/4850 [03:07<21:06,  3.31it/s]

Epoch: 2, Loss: 2.6666078567504883


Processing epoch 01:  14%|█▎        | 659/4850 [03:08<20:40,  3.38it/s]

Epoch: 2, Loss: 2.70997953414917


Processing epoch 01:  14%|█▎        | 660/4850 [03:08<20:25,  3.42it/s]

Epoch: 2, Loss: 2.7163631916046143


Processing epoch 01:  14%|█▎        | 661/4850 [03:08<20:28,  3.41it/s]

Epoch: 2, Loss: 2.195859432220459


Processing epoch 01:  14%|█▎        | 662/4850 [03:09<20:48,  3.35it/s]

Epoch: 2, Loss: 2.2857985496520996


Processing epoch 01:  14%|█▎        | 663/4850 [03:09<20:30,  3.40it/s]

Epoch: 2, Loss: 2.3393194675445557


Processing epoch 01:  14%|█▎        | 664/4850 [03:09<20:14,  3.45it/s]

Epoch: 2, Loss: 2.3610281944274902


Processing epoch 01:  14%|█▎        | 665/4850 [03:09<20:14,  3.45it/s]

Epoch: 2, Loss: 2.9579429626464844


Processing epoch 01:  14%|█▎        | 666/4850 [03:10<20:00,  3.48it/s]

Epoch: 2, Loss: 2.8772387504577637


Processing epoch 01:  14%|█▍        | 667/4850 [03:10<19:48,  3.52it/s]

Epoch: 2, Loss: 2.535252571105957


Processing epoch 01:  14%|█▍        | 668/4850 [03:10<19:43,  3.53it/s]

Epoch: 2, Loss: 2.174036979675293


Processing epoch 01:  14%|█▍        | 669/4850 [03:11<19:43,  3.53it/s]

Epoch: 2, Loss: 2.803527593612671


Processing epoch 01:  14%|█▍        | 670/4850 [03:11<19:39,  3.54it/s]

Epoch: 2, Loss: 2.648890495300293


Processing epoch 01:  14%|█▍        | 671/4850 [03:11<19:32,  3.56it/s]

Epoch: 2, Loss: 3.1086745262145996


Processing epoch 01:  14%|█▍        | 672/4850 [03:11<19:30,  3.57it/s]

Epoch: 2, Loss: 2.672079086303711


Processing epoch 01:  14%|█▍        | 673/4850 [03:12<19:35,  3.55it/s]

Epoch: 2, Loss: 2.859589099884033


Processing epoch 01:  14%|█▍        | 674/4850 [03:12<19:39,  3.54it/s]

Epoch: 2, Loss: 2.452730417251587


Processing epoch 01:  14%|█▍        | 675/4850 [03:12<19:40,  3.54it/s]

Epoch: 2, Loss: 3.346829414367676


Processing epoch 01:  14%|█▍        | 676/4850 [03:13<19:55,  3.49it/s]

Epoch: 2, Loss: 2.6672098636627197


Processing epoch 01:  14%|█▍        | 677/4850 [03:13<19:46,  3.52it/s]

Epoch: 2, Loss: 3.834395170211792


Processing epoch 01:  14%|█▍        | 678/4850 [03:13<19:42,  3.53it/s]

Epoch: 2, Loss: 2.300935745239258


Processing epoch 01:  14%|█▍        | 679/4850 [03:13<19:35,  3.55it/s]

Epoch: 2, Loss: 3.1758556365966797


Processing epoch 01:  14%|█▍        | 680/4850 [03:14<19:37,  3.54it/s]

Epoch: 2, Loss: 2.111649990081787


Processing epoch 01:  14%|█▍        | 681/4850 [03:14<19:37,  3.54it/s]

Epoch: 2, Loss: 2.4082744121551514


Processing epoch 01:  14%|█▍        | 682/4850 [03:14<19:33,  3.55it/s]

Epoch: 2, Loss: 1.9276764392852783


Processing epoch 01:  14%|█▍        | 683/4850 [03:15<19:27,  3.57it/s]

Epoch: 2, Loss: 3.4102416038513184


Processing epoch 01:  14%|█▍        | 684/4850 [03:15<19:27,  3.57it/s]

Epoch: 2, Loss: 2.807941436767578


Processing epoch 01:  14%|█▍        | 685/4850 [03:15<19:28,  3.57it/s]

Epoch: 2, Loss: 2.882002353668213


Processing epoch 01:  14%|█▍        | 686/4850 [03:15<19:26,  3.57it/s]

Epoch: 2, Loss: 2.4840378761291504


Processing epoch 01:  14%|█▍        | 687/4850 [03:16<19:35,  3.54it/s]

Epoch: 2, Loss: 2.5397489070892334


Processing epoch 01:  14%|█▍        | 688/4850 [03:16<19:32,  3.55it/s]

Epoch: 2, Loss: 2.406301259994507


Processing epoch 01:  14%|█▍        | 689/4850 [03:16<19:34,  3.54it/s]

Epoch: 2, Loss: 2.772034168243408


Processing epoch 01:  14%|█▍        | 690/4850 [03:17<19:37,  3.53it/s]

Epoch: 2, Loss: 2.857860565185547


Processing epoch 01:  14%|█▍        | 691/4850 [03:17<19:42,  3.52it/s]

Epoch: 2, Loss: 3.5462117195129395


Processing epoch 01:  14%|█▍        | 692/4850 [03:17<19:37,  3.53it/s]

Epoch: 2, Loss: 2.540586471557617


Processing epoch 01:  14%|█▍        | 693/4850 [03:17<19:38,  3.53it/s]

Epoch: 2, Loss: 2.749438762664795


Processing epoch 01:  14%|█▍        | 694/4850 [03:18<19:33,  3.54it/s]

Epoch: 2, Loss: 2.644533157348633


Processing epoch 01:  14%|█▍        | 695/4850 [03:18<19:30,  3.55it/s]

Epoch: 2, Loss: 2.852963447570801


Processing epoch 01:  14%|█▍        | 696/4850 [03:18<19:36,  3.53it/s]

Epoch: 2, Loss: 2.707606315612793


Processing epoch 01:  14%|█▍        | 697/4850 [03:19<19:31,  3.54it/s]

Epoch: 2, Loss: 2.999541759490967


Processing epoch 01:  14%|█▍        | 698/4850 [03:19<20:03,  3.45it/s]

Epoch: 2, Loss: 2.2030303478240967


Processing epoch 01:  14%|█▍        | 699/4850 [03:19<20:07,  3.44it/s]

Epoch: 2, Loss: 2.4209132194519043


Processing epoch 01:  14%|█▍        | 700/4850 [03:19<20:19,  3.40it/s]

Epoch: 2, Loss: 2.4731061458587646


Processing epoch 01:  14%|█▍        | 701/4850 [03:20<20:25,  3.39it/s]

Epoch: 2, Loss: 2.6813879013061523


Processing epoch 01:  14%|█▍        | 702/4850 [03:20<20:27,  3.38it/s]

Epoch: 2, Loss: 2.7098917961120605


Processing epoch 01:  14%|█▍        | 703/4850 [03:20<20:17,  3.41it/s]

Epoch: 2, Loss: 2.9635062217712402


Processing epoch 01:  15%|█▍        | 704/4850 [03:21<20:11,  3.42it/s]

Epoch: 2, Loss: 4.060814380645752


Processing epoch 01:  15%|█▍        | 705/4850 [03:21<20:24,  3.39it/s]

Epoch: 2, Loss: 2.8633241653442383


Processing epoch 01:  15%|█▍        | 706/4850 [03:21<20:36,  3.35it/s]

Epoch: 2, Loss: 3.199504852294922


Processing epoch 01:  15%|█▍        | 707/4850 [03:21<20:41,  3.34it/s]

Epoch: 2, Loss: 2.2328908443450928


Processing epoch 01:  15%|█▍        | 708/4850 [03:22<20:40,  3.34it/s]

Epoch: 2, Loss: 2.9246857166290283


Processing epoch 01:  15%|█▍        | 709/4850 [03:22<20:30,  3.37it/s]

Epoch: 2, Loss: 4.2712907791137695


Processing epoch 01:  15%|█▍        | 710/4850 [03:22<20:35,  3.35it/s]

Epoch: 2, Loss: 2.7711520195007324


Processing epoch 01:  15%|█▍        | 711/4850 [03:23<20:13,  3.41it/s]

Epoch: 2, Loss: 2.4975576400756836


Processing epoch 01:  15%|█▍        | 712/4850 [03:23<19:57,  3.46it/s]

Epoch: 2, Loss: 2.4001991748809814


Processing epoch 01:  15%|█▍        | 713/4850 [03:23<19:45,  3.49it/s]

Epoch: 2, Loss: 2.7434492111206055


Processing epoch 01:  15%|█▍        | 714/4850 [03:24<19:36,  3.52it/s]

Epoch: 2, Loss: 2.2135403156280518


Processing epoch 01:  15%|█▍        | 715/4850 [03:24<19:27,  3.54it/s]

Epoch: 2, Loss: 2.8263909816741943


Processing epoch 01:  15%|█▍        | 716/4850 [03:24<19:37,  3.51it/s]

Epoch: 2, Loss: 2.503669500350952


Processing epoch 01:  15%|█▍        | 717/4850 [03:24<19:28,  3.54it/s]

Epoch: 2, Loss: 2.0776140689849854


Processing epoch 01:  15%|█▍        | 718/4850 [03:25<19:23,  3.55it/s]

Epoch: 2, Loss: 2.4959566593170166


Processing epoch 01:  15%|█▍        | 719/4850 [03:25<19:24,  3.55it/s]

Epoch: 2, Loss: 2.7576839923858643


Processing epoch 01:  15%|█▍        | 720/4850 [03:25<19:20,  3.56it/s]

Epoch: 2, Loss: 2.7241768836975098


Processing epoch 01:  15%|█▍        | 721/4850 [03:25<19:16,  3.57it/s]

Epoch: 2, Loss: 3.04913067817688


Processing epoch 01:  15%|█▍        | 722/4850 [03:26<19:17,  3.57it/s]

Epoch: 2, Loss: 2.7581849098205566


Processing epoch 01:  15%|█▍        | 723/4850 [03:26<19:22,  3.55it/s]

Epoch: 2, Loss: 2.3759546279907227


Processing epoch 01:  15%|█▍        | 724/4850 [03:26<19:28,  3.53it/s]

Epoch: 2, Loss: 2.491445541381836


Processing epoch 01:  15%|█▍        | 725/4850 [03:27<19:31,  3.52it/s]

Epoch: 2, Loss: 2.5663976669311523


Processing epoch 01:  15%|█▍        | 726/4850 [03:27<19:30,  3.52it/s]

Epoch: 2, Loss: 3.2679972648620605


Processing epoch 01:  15%|█▍        | 727/4850 [03:27<19:37,  3.50it/s]

Epoch: 2, Loss: 2.538405418395996


Processing epoch 01:  15%|█▌        | 728/4850 [03:27<19:31,  3.52it/s]

Epoch: 2, Loss: 2.6524622440338135


Processing epoch 01:  15%|█▌        | 729/4850 [03:28<19:25,  3.53it/s]

Epoch: 2, Loss: 3.00642466545105


Processing epoch 01:  15%|█▌        | 730/4850 [03:28<19:12,  3.57it/s]

Epoch: 2, Loss: 2.963515281677246


Processing epoch 01:  15%|█▌        | 731/4850 [03:28<19:11,  3.58it/s]

Epoch: 2, Loss: 2.4381937980651855


Processing epoch 01:  15%|█▌        | 732/4850 [03:29<19:08,  3.58it/s]

Epoch: 2, Loss: 3.0825366973876953


Processing epoch 01:  15%|█▌        | 733/4850 [03:29<19:14,  3.57it/s]

Epoch: 2, Loss: 2.6278347969055176


Processing epoch 01:  15%|█▌        | 734/4850 [03:29<19:14,  3.56it/s]

Epoch: 2, Loss: 2.583556652069092


Processing epoch 01:  15%|█▌        | 735/4850 [03:29<19:13,  3.57it/s]

Epoch: 2, Loss: 2.8085885047912598


Processing epoch 01:  15%|█▌        | 736/4850 [03:30<19:09,  3.58it/s]

Epoch: 2, Loss: 2.825742483139038


Processing epoch 01:  15%|█▌        | 737/4850 [03:30<19:09,  3.58it/s]

Epoch: 2, Loss: 2.8137965202331543


Processing epoch 01:  15%|█▌        | 738/4850 [03:30<19:15,  3.56it/s]

Epoch: 2, Loss: 2.3927390575408936


Processing epoch 01:  15%|█▌        | 739/4850 [03:31<19:14,  3.56it/s]

Epoch: 2, Loss: 2.487423896789551


Processing epoch 01:  15%|█▌        | 740/4850 [03:31<19:15,  3.56it/s]

Epoch: 2, Loss: 2.5566940307617188


Processing epoch 01:  15%|█▌        | 741/4850 [03:31<19:14,  3.56it/s]

Epoch: 2, Loss: 2.655534267425537


Processing epoch 01:  15%|█▌        | 742/4850 [03:31<19:12,  3.56it/s]

Epoch: 2, Loss: 2.679166793823242


Processing epoch 01:  15%|█▌        | 743/4850 [03:32<19:06,  3.58it/s]

Epoch: 2, Loss: 3.0487844944000244


Processing epoch 01:  15%|█▌        | 744/4850 [03:32<19:06,  3.58it/s]

Epoch: 2, Loss: 2.3157453536987305


Processing epoch 01:  15%|█▌        | 745/4850 [03:32<19:12,  3.56it/s]

Epoch: 2, Loss: 2.781632423400879


Processing epoch 01:  15%|█▌        | 746/4850 [03:32<19:16,  3.55it/s]

Epoch: 2, Loss: 2.4678869247436523


Processing epoch 01:  15%|█▌        | 747/4850 [03:33<19:36,  3.49it/s]

Epoch: 2, Loss: 2.0825722217559814


Processing epoch 01:  15%|█▌        | 748/4850 [03:33<19:42,  3.47it/s]

Epoch: 2, Loss: 2.704925537109375


Processing epoch 01:  15%|█▌        | 749/4850 [03:33<19:51,  3.44it/s]

Epoch: 2, Loss: 2.048659563064575


Processing epoch 01:  15%|█▌        | 750/4850 [03:34<19:53,  3.43it/s]

Epoch: 2, Loss: 3.0808944702148438


Processing epoch 01:  15%|█▌        | 751/4850 [03:34<19:57,  3.42it/s]

Epoch: 2, Loss: 2.237773895263672


Processing epoch 01:  16%|█▌        | 752/4850 [03:34<19:52,  3.44it/s]

Epoch: 2, Loss: 2.505560874938965


Processing epoch 01:  16%|█▌        | 753/4850 [03:35<19:51,  3.44it/s]

Epoch: 2, Loss: 3.2218520641326904


Processing epoch 01:  16%|█▌        | 754/4850 [03:35<20:05,  3.40it/s]

Epoch: 2, Loss: 2.96197772026062


Processing epoch 01:  16%|█▌        | 755/4850 [03:35<20:15,  3.37it/s]

Epoch: 2, Loss: 2.3810276985168457


Processing epoch 01:  16%|█▌        | 756/4850 [03:35<20:22,  3.35it/s]

Epoch: 2, Loss: 2.820559501647949


Processing epoch 01:  16%|█▌        | 757/4850 [03:36<20:31,  3.32it/s]

Epoch: 2, Loss: 2.3203935623168945


Processing epoch 01:  16%|█▌        | 758/4850 [03:36<20:31,  3.32it/s]

Epoch: 2, Loss: 2.898491859436035


Processing epoch 01:  16%|█▌        | 759/4850 [03:36<20:06,  3.39it/s]

Epoch: 2, Loss: 3.4488701820373535


Processing epoch 01:  16%|█▌        | 760/4850 [03:37<19:51,  3.43it/s]

Epoch: 2, Loss: 3.1276538372039795


Processing epoch 01:  16%|█▌        | 761/4850 [03:37<19:41,  3.46it/s]

Epoch: 2, Loss: 2.2707481384277344


Processing epoch 01:  16%|█▌        | 762/4850 [03:37<19:33,  3.48it/s]

Epoch: 2, Loss: 3.0764079093933105


Processing epoch 01:  16%|█▌        | 763/4850 [03:37<19:34,  3.48it/s]

Epoch: 2, Loss: 2.684314489364624


Processing epoch 01:  16%|█▌        | 764/4850 [03:38<19:31,  3.49it/s]

Epoch: 2, Loss: 3.1359262466430664


Processing epoch 01:  16%|█▌        | 765/4850 [03:38<19:24,  3.51it/s]

Epoch: 2, Loss: 2.9880480766296387


Processing epoch 01:  16%|█▌        | 766/4850 [03:38<19:21,  3.52it/s]

Epoch: 2, Loss: 2.5489847660064697


Processing epoch 01:  16%|█▌        | 767/4850 [03:39<19:20,  3.52it/s]

Epoch: 2, Loss: 3.0516774654388428


Processing epoch 01:  16%|█▌        | 768/4850 [03:39<19:13,  3.54it/s]

Epoch: 2, Loss: 2.799351692199707


Processing epoch 01:  16%|█▌        | 769/4850 [03:39<19:11,  3.54it/s]

Epoch: 2, Loss: 2.5269546508789062


Processing epoch 01:  16%|█▌        | 770/4850 [03:39<19:04,  3.56it/s]

Epoch: 2, Loss: 2.843183994293213


Processing epoch 01:  16%|█▌        | 771/4850 [03:40<19:01,  3.57it/s]

Epoch: 2, Loss: 3.324711322784424


Processing epoch 01:  16%|█▌        | 772/4850 [03:40<19:05,  3.56it/s]

Epoch: 2, Loss: 3.2434492111206055


Processing epoch 01:  16%|█▌        | 773/4850 [03:40<19:02,  3.57it/s]

Epoch: 2, Loss: 3.061619281768799


Processing epoch 01:  16%|█▌        | 774/4850 [03:41<19:15,  3.53it/s]

Epoch: 2, Loss: 2.391695261001587


Processing epoch 01:  16%|█▌        | 775/4850 [03:41<19:14,  3.53it/s]

Epoch: 2, Loss: 2.794067859649658


Processing epoch 01:  16%|█▌        | 776/4850 [03:41<19:13,  3.53it/s]

Epoch: 2, Loss: 2.8785805702209473


Processing epoch 01:  16%|█▌        | 777/4850 [03:41<19:10,  3.54it/s]

Epoch: 2, Loss: 2.8856029510498047


Processing epoch 01:  16%|█▌        | 778/4850 [03:42<19:09,  3.54it/s]

Epoch: 2, Loss: 2.5801045894622803


Processing epoch 01:  16%|█▌        | 779/4850 [03:42<19:08,  3.54it/s]

Epoch: 2, Loss: 2.4294073581695557


Processing epoch 01:  16%|█▌        | 780/4850 [03:42<19:10,  3.54it/s]

Epoch: 2, Loss: 2.9507741928100586


Processing epoch 01:  16%|█▌        | 781/4850 [03:43<19:13,  3.53it/s]

Epoch: 2, Loss: 2.9874720573425293


Processing epoch 01:  16%|█▌        | 782/4850 [03:43<19:13,  3.53it/s]

Epoch: 2, Loss: 1.9222640991210938


Processing epoch 01:  16%|█▌        | 783/4850 [03:43<19:12,  3.53it/s]

Epoch: 2, Loss: 2.3243980407714844


Processing epoch 01:  16%|█▌        | 784/4850 [03:43<19:12,  3.53it/s]

Epoch: 2, Loss: 2.53019380569458


Processing epoch 01:  16%|█▌        | 785/4850 [03:44<19:20,  3.50it/s]

Epoch: 2, Loss: 3.1779608726501465


Processing epoch 01:  16%|█▌        | 786/4850 [03:44<19:16,  3.51it/s]

Epoch: 2, Loss: 2.777228355407715


Processing epoch 01:  16%|█▌        | 787/4850 [03:44<19:14,  3.52it/s]

Epoch: 2, Loss: 2.380270004272461


Processing epoch 01:  16%|█▌        | 788/4850 [03:45<19:13,  3.52it/s]

Epoch: 2, Loss: 2.7214627265930176


Processing epoch 01:  16%|█▋        | 789/4850 [03:45<19:11,  3.53it/s]

Epoch: 2, Loss: 2.7117514610290527


Processing epoch 01:  16%|█▋        | 790/4850 [03:45<18:58,  3.56it/s]

Epoch: 2, Loss: 4.400046348571777


Processing epoch 01:  16%|█▋        | 791/4850 [03:45<18:52,  3.59it/s]

Epoch: 2, Loss: 3.4430091381073


Processing epoch 01:  16%|█▋        | 792/4850 [03:46<18:52,  3.58it/s]

Epoch: 2, Loss: 2.6217570304870605


Processing epoch 01:  16%|█▋        | 793/4850 [03:46<18:55,  3.57it/s]

Epoch: 2, Loss: 3.046764850616455


Processing epoch 01:  16%|█▋        | 794/4850 [03:46<19:09,  3.53it/s]

Epoch: 2, Loss: 2.160825490951538


Processing epoch 01:  16%|█▋        | 795/4850 [03:47<19:18,  3.50it/s]

Epoch: 2, Loss: 2.787529945373535


Processing epoch 01:  16%|█▋        | 796/4850 [03:47<19:32,  3.46it/s]

Epoch: 2, Loss: 2.1551692485809326


Processing epoch 01:  16%|█▋        | 797/4850 [03:47<19:42,  3.43it/s]

Epoch: 2, Loss: 2.7353081703186035


Processing epoch 01:  16%|█▋        | 798/4850 [03:47<19:47,  3.41it/s]

Epoch: 2, Loss: 3.0806517601013184


Processing epoch 01:  16%|█▋        | 799/4850 [03:48<19:42,  3.43it/s]

Epoch: 2, Loss: 2.3731870651245117


Processing epoch 01:  16%|█▋        | 800/4850 [03:48<19:34,  3.45it/s]

Epoch: 2, Loss: 3.006396770477295


Processing epoch 01:  17%|█▋        | 801/4850 [03:48<19:45,  3.42it/s]

Epoch: 2, Loss: 2.4873735904693604


Processing epoch 01:  17%|█▋        | 802/4850 [03:49<19:45,  3.41it/s]

Epoch: 2, Loss: 3.001654863357544


Processing epoch 01:  17%|█▋        | 803/4850 [03:49<19:52,  3.39it/s]

Epoch: 2, Loss: 2.475149631500244


Processing epoch 01:  17%|█▋        | 804/4850 [03:49<19:50,  3.40it/s]

Epoch: 2, Loss: 2.801384687423706


Processing epoch 01:  17%|█▋        | 805/4850 [03:49<19:42,  3.42it/s]

Epoch: 2, Loss: 3.2419707775115967


Processing epoch 01:  17%|█▋        | 806/4850 [03:50<19:33,  3.45it/s]

Epoch: 2, Loss: 2.4935407638549805


Processing epoch 01:  17%|█▋        | 807/4850 [03:50<19:23,  3.47it/s]

Epoch: 2, Loss: 2.548917055130005


Processing epoch 01:  17%|█▋        | 808/4850 [03:50<19:39,  3.43it/s]

Epoch: 2, Loss: 2.0837807655334473


Processing epoch 01:  17%|█▋        | 809/4850 [03:51<19:27,  3.46it/s]

Epoch: 2, Loss: 2.701789379119873


Processing epoch 01:  17%|█▋        | 810/4850 [03:51<19:18,  3.49it/s]

Epoch: 2, Loss: 1.9430150985717773


Processing epoch 01:  17%|█▋        | 811/4850 [03:51<19:19,  3.48it/s]

Epoch: 2, Loss: 2.363065719604492


Processing epoch 01:  17%|█▋        | 812/4850 [03:51<19:12,  3.50it/s]

Epoch: 2, Loss: 2.5948596000671387


Processing epoch 01:  17%|█▋        | 813/4850 [03:52<19:03,  3.53it/s]

Epoch: 2, Loss: 3.299665927886963


Processing epoch 01:  17%|█▋        | 814/4850 [03:52<19:06,  3.52it/s]

Epoch: 2, Loss: 2.7934021949768066


Processing epoch 01:  17%|█▋        | 815/4850 [03:52<19:04,  3.52it/s]

Epoch: 2, Loss: 2.2249975204467773


Processing epoch 01:  17%|█▋        | 816/4850 [03:53<18:59,  3.54it/s]

Epoch: 2, Loss: 3.483391761779785


Processing epoch 01:  17%|█▋        | 817/4850 [03:53<19:02,  3.53it/s]

Epoch: 2, Loss: 2.1871700286865234


Processing epoch 01:  17%|█▋        | 818/4850 [03:53<19:04,  3.52it/s]

Epoch: 2, Loss: 3.0371367931365967


Processing epoch 01:  17%|█▋        | 819/4850 [03:53<18:57,  3.55it/s]

Epoch: 2, Loss: 2.763521671295166


Processing epoch 01:  17%|█▋        | 820/4850 [03:54<18:51,  3.56it/s]

Epoch: 2, Loss: 2.9357247352600098


Processing epoch 01:  17%|█▋        | 821/4850 [03:54<18:44,  3.58it/s]

Epoch: 2, Loss: 2.5578787326812744


Processing epoch 01:  17%|█▋        | 822/4850 [03:54<18:49,  3.57it/s]

Epoch: 2, Loss: 2.250856399536133


Processing epoch 01:  17%|█▋        | 823/4850 [03:55<18:48,  3.57it/s]

Epoch: 2, Loss: 3.424811840057373


Processing epoch 01:  17%|█▋        | 824/4850 [03:55<18:50,  3.56it/s]

Epoch: 2, Loss: 2.6451027393341064


Processing epoch 01:  17%|█▋        | 825/4850 [03:55<18:58,  3.54it/s]

Epoch: 2, Loss: 2.9649171829223633


Processing epoch 01:  17%|█▋        | 826/4850 [03:55<18:57,  3.54it/s]

Epoch: 2, Loss: 2.570573329925537


Processing epoch 01:  17%|█▋        | 827/4850 [03:56<18:56,  3.54it/s]

Epoch: 2, Loss: 2.9072608947753906


Processing epoch 01:  17%|█▋        | 828/4850 [03:56<18:55,  3.54it/s]

Epoch: 2, Loss: 2.4433062076568604


Processing epoch 01:  17%|█▋        | 829/4850 [03:56<18:52,  3.55it/s]

Epoch: 2, Loss: 2.5778512954711914


Processing epoch 01:  17%|█▋        | 830/4850 [03:57<18:48,  3.56it/s]

Epoch: 2, Loss: 2.6991066932678223


Processing epoch 01:  17%|█▋        | 831/4850 [03:57<18:53,  3.55it/s]

Epoch: 2, Loss: 3.164726734161377


Processing epoch 01:  17%|█▋        | 832/4850 [03:57<18:51,  3.55it/s]

Epoch: 2, Loss: 2.572165012359619


Processing epoch 01:  17%|█▋        | 833/4850 [03:57<18:56,  3.53it/s]

Epoch: 2, Loss: 2.2042925357818604


Processing epoch 01:  17%|█▋        | 834/4850 [03:58<19:04,  3.51it/s]

Epoch: 2, Loss: 3.8014326095581055


Processing epoch 01:  17%|█▋        | 835/4850 [03:58<19:04,  3.51it/s]

Epoch: 2, Loss: 3.381101131439209


Processing epoch 01:  17%|█▋        | 836/4850 [03:58<19:06,  3.50it/s]

Epoch: 2, Loss: 2.5128417015075684


Processing epoch 01:  17%|█▋        | 837/4850 [03:59<19:01,  3.51it/s]

Epoch: 2, Loss: 2.7592973709106445


Processing epoch 01:  17%|█▋        | 838/4850 [03:59<19:00,  3.52it/s]

Epoch: 2, Loss: 2.714989185333252


Processing epoch 01:  17%|█▋        | 839/4850 [03:59<18:54,  3.53it/s]

Epoch: 2, Loss: 2.7063021659851074


Processing epoch 01:  17%|█▋        | 840/4850 [03:59<18:55,  3.53it/s]

Epoch: 2, Loss: 3.296152114868164


Processing epoch 01:  17%|█▋        | 841/4850 [04:00<18:50,  3.54it/s]

Epoch: 2, Loss: 2.0261616706848145


Processing epoch 01:  17%|█▋        | 842/4850 [04:00<18:47,  3.56it/s]

Epoch: 2, Loss: 3.265679359436035


Processing epoch 01:  17%|█▋        | 843/4850 [04:00<18:51,  3.54it/s]

Epoch: 2, Loss: 2.3395609855651855


Processing epoch 01:  17%|█▋        | 844/4850 [04:01<18:51,  3.54it/s]

Epoch: 2, Loss: 2.480255365371704


Processing epoch 01:  17%|█▋        | 845/4850 [04:01<19:13,  3.47it/s]

Epoch: 2, Loss: 3.054739236831665


Processing epoch 01:  17%|█▋        | 846/4850 [04:01<19:21,  3.45it/s]

Epoch: 2, Loss: 2.092709541320801


Processing epoch 01:  17%|█▋        | 847/4850 [04:01<19:21,  3.45it/s]

Epoch: 2, Loss: 2.611539363861084


Processing epoch 01:  17%|█▋        | 848/4850 [04:02<19:10,  3.48it/s]

Epoch: 2, Loss: 2.6051812171936035


Processing epoch 01:  18%|█▊        | 849/4850 [04:02<19:17,  3.46it/s]

Epoch: 2, Loss: 1.9875218868255615


Processing epoch 01:  18%|█▊        | 850/4850 [04:02<19:31,  3.41it/s]

Epoch: 2, Loss: 2.9490151405334473


Processing epoch 01:  18%|█▊        | 851/4850 [04:03<19:54,  3.35it/s]

Epoch: 2, Loss: 2.5600953102111816


Processing epoch 01:  18%|█▊        | 852/4850 [04:03<20:10,  3.30it/s]

Epoch: 2, Loss: 2.6111984252929688


Processing epoch 01:  18%|█▊        | 853/4850 [04:03<19:44,  3.37it/s]

Epoch: 2, Loss: 2.781843900680542


Processing epoch 01:  18%|█▊        | 854/4850 [04:03<19:39,  3.39it/s]

Epoch: 2, Loss: 2.8555994033813477


Processing epoch 01:  18%|█▊        | 855/4850 [04:04<19:47,  3.36it/s]

Epoch: 2, Loss: 2.574866533279419


Processing epoch 01:  18%|█▊        | 856/4850 [04:04<19:32,  3.41it/s]

Epoch: 2, Loss: 2.755061149597168


Processing epoch 01:  18%|█▊        | 857/4850 [04:04<19:51,  3.35it/s]

Epoch: 2, Loss: 2.5745439529418945


Processing epoch 01:  18%|█▊        | 858/4850 [04:05<19:58,  3.33it/s]

Epoch: 2, Loss: 2.823974609375


Processing epoch 01:  18%|█▊        | 859/4850 [04:05<19:36,  3.39it/s]

Epoch: 2, Loss: 2.9912514686584473


Processing epoch 01:  18%|█▊        | 860/4850 [04:05<19:20,  3.44it/s]

Epoch: 2, Loss: 2.405768871307373


Processing epoch 01:  18%|█▊        | 861/4850 [04:06<19:10,  3.47it/s]

Epoch: 2, Loss: 2.343616485595703


Processing epoch 01:  18%|█▊        | 862/4850 [04:06<19:00,  3.50it/s]

Epoch: 2, Loss: 2.834970474243164


Processing epoch 01:  18%|█▊        | 863/4850 [04:06<18:54,  3.52it/s]

Epoch: 2, Loss: 2.1372742652893066


Processing epoch 01:  18%|█▊        | 864/4850 [04:06<18:56,  3.51it/s]

Epoch: 2, Loss: 2.6235909461975098


Processing epoch 01:  18%|█▊        | 865/4850 [04:07<19:02,  3.49it/s]

Epoch: 2, Loss: 2.7295641899108887


Processing epoch 01:  18%|█▊        | 866/4850 [04:07<18:56,  3.51it/s]

Epoch: 2, Loss: 2.9994688034057617


Processing epoch 01:  18%|█▊        | 867/4850 [04:07<18:46,  3.54it/s]

Epoch: 2, Loss: 3.38399076461792


Processing epoch 01:  18%|█▊        | 868/4850 [04:07<18:38,  3.56it/s]

Epoch: 2, Loss: 2.915421485900879


Processing epoch 01:  18%|█▊        | 869/4850 [04:08<18:39,  3.56it/s]

Epoch: 2, Loss: 2.5979068279266357


Processing epoch 01:  18%|█▊        | 870/4850 [04:08<18:37,  3.56it/s]

Epoch: 2, Loss: 2.1100387573242188


Processing epoch 01:  18%|█▊        | 871/4850 [04:08<18:38,  3.56it/s]

Epoch: 2, Loss: 2.178999185562134


Processing epoch 01:  18%|█▊        | 872/4850 [04:09<18:36,  3.56it/s]

Epoch: 2, Loss: 2.619840145111084


Processing epoch 01:  18%|█▊        | 873/4850 [04:09<18:37,  3.56it/s]

Epoch: 2, Loss: 2.288987874984741


Processing epoch 01:  18%|█▊        | 874/4850 [04:09<18:38,  3.55it/s]

Epoch: 2, Loss: 2.866635322570801


Processing epoch 01:  18%|█▊        | 875/4850 [04:09<18:35,  3.56it/s]

Epoch: 2, Loss: 2.585620880126953


Processing epoch 01:  18%|█▊        | 876/4850 [04:10<18:44,  3.53it/s]

Epoch: 2, Loss: 2.6444578170776367


Processing epoch 01:  18%|█▊        | 877/4850 [04:10<18:37,  3.56it/s]

Epoch: 2, Loss: 2.240234375


Processing epoch 01:  18%|█▊        | 878/4850 [04:10<18:38,  3.55it/s]

Epoch: 2, Loss: 2.6922073364257812


Processing epoch 01:  18%|█▊        | 879/4850 [04:11<18:34,  3.56it/s]

Epoch: 2, Loss: 3.019711971282959


Processing epoch 01:  18%|█▊        | 880/4850 [04:11<18:33,  3.57it/s]

Epoch: 2, Loss: 3.8931922912597656


Processing epoch 01:  18%|█▊        | 881/4850 [04:11<18:32,  3.57it/s]

Epoch: 2, Loss: 2.881338119506836


Processing epoch 01:  18%|█▊        | 882/4850 [04:11<18:31,  3.57it/s]

Epoch: 2, Loss: 2.403634786605835


Processing epoch 01:  18%|█▊        | 883/4850 [04:12<18:36,  3.55it/s]

Epoch: 2, Loss: 2.1742827892303467


Processing epoch 01:  18%|█▊        | 884/4850 [04:12<18:35,  3.56it/s]

Epoch: 2, Loss: 2.801906108856201


Processing epoch 01:  18%|█▊        | 885/4850 [04:12<18:34,  3.56it/s]

Epoch: 2, Loss: 2.065521240234375


Processing epoch 01:  18%|█▊        | 886/4850 [04:13<18:34,  3.56it/s]

Epoch: 2, Loss: 2.4574849605560303


Processing epoch 01:  18%|█▊        | 887/4850 [04:13<18:34,  3.56it/s]

Epoch: 2, Loss: 3.7497968673706055


Processing epoch 01:  18%|█▊        | 888/4850 [04:13<18:32,  3.56it/s]

Epoch: 2, Loss: 2.0703487396240234


Processing epoch 01:  18%|█▊        | 889/4850 [04:13<18:31,  3.56it/s]

Epoch: 2, Loss: 2.7122559547424316


Processing epoch 01:  18%|█▊        | 890/4850 [04:14<18:25,  3.58it/s]

Epoch: 2, Loss: 2.7773399353027344


Processing epoch 01:  18%|█▊        | 891/4850 [04:14<18:28,  3.57it/s]

Epoch: 2, Loss: 3.0688366889953613


Processing epoch 01:  18%|█▊        | 892/4850 [04:14<18:27,  3.57it/s]

Epoch: 2, Loss: 2.398333787918091


Processing epoch 01:  18%|█▊        | 893/4850 [04:15<18:31,  3.56it/s]

Epoch: 2, Loss: 3.1836187839508057


Processing epoch 01:  18%|█▊        | 894/4850 [04:15<18:36,  3.54it/s]

Epoch: 2, Loss: 2.4742465019226074


Processing epoch 01:  18%|█▊        | 895/4850 [04:15<18:43,  3.52it/s]

Epoch: 2, Loss: 2.045091152191162


Processing epoch 01:  18%|█▊        | 896/4850 [04:15<19:01,  3.46it/s]

Epoch: 2, Loss: 2.500255584716797


Processing epoch 01:  18%|█▊        | 897/4850 [04:16<18:53,  3.49it/s]

Epoch: 2, Loss: 2.7011451721191406


Processing epoch 01:  19%|█▊        | 898/4850 [04:16<19:11,  3.43it/s]

Epoch: 2, Loss: 2.6188149452209473


Processing epoch 01:  19%|█▊        | 899/4850 [04:16<19:22,  3.40it/s]

Epoch: 2, Loss: 2.2755990028381348


Processing epoch 01:  19%|█▊        | 900/4850 [04:17<19:14,  3.42it/s]

Epoch: 2, Loss: 2.615023136138916


Processing epoch 01:  19%|█▊        | 901/4850 [04:17<19:21,  3.40it/s]

Epoch: 2, Loss: 2.936710834503174


Processing epoch 01:  19%|█▊        | 902/4850 [04:17<19:20,  3.40it/s]

Epoch: 2, Loss: 2.2783203125


Processing epoch 01:  19%|█▊        | 903/4850 [04:17<19:26,  3.38it/s]

Epoch: 2, Loss: 3.103039264678955


Processing epoch 01:  19%|█▊        | 904/4850 [04:18<19:25,  3.39it/s]

Epoch: 2, Loss: 3.158114194869995


Processing epoch 01:  19%|█▊        | 905/4850 [04:18<19:28,  3.38it/s]

Epoch: 2, Loss: 2.839822769165039


Processing epoch 01:  19%|█▊        | 906/4850 [04:18<19:29,  3.37it/s]

Epoch: 2, Loss: 2.799839973449707


Processing epoch 01:  19%|█▊        | 907/4850 [04:19<19:31,  3.37it/s]

Epoch: 2, Loss: 2.496635913848877


Processing epoch 01:  19%|█▊        | 908/4850 [04:19<19:34,  3.36it/s]

Epoch: 2, Loss: 2.2457990646362305


Processing epoch 01:  19%|█▊        | 909/4850 [04:19<19:40,  3.34it/s]

Epoch: 2, Loss: 3.1256253719329834


Processing epoch 01:  19%|█▉        | 910/4850 [04:20<19:32,  3.36it/s]

Epoch: 2, Loss: 2.3438262939453125


Processing epoch 01:  19%|█▉        | 911/4850 [04:20<19:34,  3.35it/s]

Epoch: 2, Loss: 2.6384153366088867


Processing epoch 01:  19%|█▉        | 912/4850 [04:20<19:43,  3.33it/s]

Epoch: 2, Loss: 2.3195085525512695


Processing epoch 01:  19%|█▉        | 913/4850 [04:20<19:35,  3.35it/s]

Epoch: 2, Loss: 2.8130717277526855


Processing epoch 01:  19%|█▉        | 914/4850 [04:21<19:42,  3.33it/s]

Epoch: 2, Loss: 3.01271915435791


Processing epoch 01:  19%|█▉        | 915/4850 [04:21<19:38,  3.34it/s]

Epoch: 2, Loss: 2.18880558013916


Processing epoch 01:  19%|█▉        | 916/4850 [04:21<19:32,  3.36it/s]

Epoch: 2, Loss: 2.9601521492004395


Processing epoch 01:  19%|█▉        | 917/4850 [04:22<19:39,  3.34it/s]

Epoch: 2, Loss: 2.655637502670288


Processing epoch 01:  19%|█▉        | 918/4850 [04:22<19:17,  3.40it/s]

Epoch: 2, Loss: 2.3375844955444336


Processing epoch 01:  19%|█▉        | 919/4850 [04:22<19:02,  3.44it/s]

Epoch: 2, Loss: 2.7338995933532715


Processing epoch 01:  19%|█▉        | 920/4850 [04:22<19:00,  3.45it/s]

Epoch: 2, Loss: 2.4790759086608887


Processing epoch 01:  19%|█▉        | 921/4850 [04:23<18:51,  3.47it/s]

Epoch: 2, Loss: 2.195347309112549


Processing epoch 01:  19%|█▉        | 922/4850 [04:23<18:39,  3.51it/s]

Epoch: 2, Loss: 2.133558750152588


Processing epoch 01:  19%|█▉        | 923/4850 [04:23<18:36,  3.52it/s]

Epoch: 2, Loss: 2.4540488719940186


Processing epoch 01:  19%|█▉        | 924/4850 [04:24<18:34,  3.52it/s]

Epoch: 2, Loss: 2.7014429569244385


Processing epoch 01:  19%|█▉        | 925/4850 [04:24<18:33,  3.53it/s]

Epoch: 2, Loss: 2.848649024963379


Processing epoch 01:  19%|█▉        | 926/4850 [04:24<18:33,  3.52it/s]

Epoch: 2, Loss: 2.6570186614990234


Processing epoch 01:  19%|█▉        | 927/4850 [04:24<18:25,  3.55it/s]

Epoch: 2, Loss: 2.6626529693603516


Processing epoch 01:  19%|█▉        | 928/4850 [04:25<18:27,  3.54it/s]

Epoch: 2, Loss: 2.9579198360443115


Processing epoch 01:  19%|█▉        | 929/4850 [04:25<18:22,  3.56it/s]

Epoch: 2, Loss: 3.734548330307007


Processing epoch 01:  19%|█▉        | 930/4850 [04:25<18:18,  3.57it/s]

Epoch: 2, Loss: 2.9664998054504395


Processing epoch 01:  19%|█▉        | 931/4850 [04:26<18:34,  3.52it/s]

Epoch: 2, Loss: 1.996737003326416


Processing epoch 01:  19%|█▉        | 932/4850 [04:26<18:29,  3.53it/s]

Epoch: 2, Loss: 2.160033702850342


Processing epoch 01:  19%|█▉        | 933/4850 [04:26<18:24,  3.55it/s]

Epoch: 2, Loss: 2.7721195220947266


Processing epoch 01:  19%|█▉        | 934/4850 [04:26<18:24,  3.54it/s]

Epoch: 2, Loss: 3.2004213333129883


Processing epoch 01:  19%|█▉        | 935/4850 [04:27<18:28,  3.53it/s]

Epoch: 2, Loss: 1.9310884475708008


Processing epoch 01:  19%|█▉        | 936/4850 [04:27<18:24,  3.54it/s]

Epoch: 2, Loss: 2.5664334297180176


Processing epoch 01:  19%|█▉        | 937/4850 [04:27<18:21,  3.55it/s]

Epoch: 2, Loss: 3.416172742843628


Processing epoch 01:  19%|█▉        | 938/4850 [04:28<18:20,  3.55it/s]

Epoch: 2, Loss: 2.8077385425567627


Processing epoch 01:  19%|█▉        | 939/4850 [04:28<18:23,  3.55it/s]

Epoch: 2, Loss: 2.8654017448425293


Processing epoch 01:  19%|█▉        | 940/4850 [04:28<18:22,  3.55it/s]

Epoch: 2, Loss: 2.199281692504883


Processing epoch 01:  19%|█▉        | 941/4850 [04:28<18:17,  3.56it/s]

Epoch: 2, Loss: 2.6969146728515625


Processing epoch 01:  19%|█▉        | 942/4850 [04:29<18:35,  3.50it/s]

Epoch: 2, Loss: 2.234130859375


Processing epoch 01:  19%|█▉        | 943/4850 [04:29<18:35,  3.50it/s]

Epoch: 2, Loss: 3.2860608100891113


Processing epoch 01:  19%|█▉        | 944/4850 [04:29<18:25,  3.53it/s]

Epoch: 2, Loss: 2.6455225944519043


Processing epoch 01:  19%|█▉        | 945/4850 [04:30<18:19,  3.55it/s]

Epoch: 2, Loss: 3.037318229675293


Processing epoch 01:  20%|█▉        | 946/4850 [04:30<18:20,  3.55it/s]

Epoch: 2, Loss: 3.155433177947998


Processing epoch 01:  20%|█▉        | 947/4850 [04:30<18:27,  3.52it/s]

Epoch: 2, Loss: 2.763883590698242


Processing epoch 01:  20%|█▉        | 948/4850 [04:30<18:28,  3.52it/s]

Epoch: 2, Loss: 2.537235736846924


Processing epoch 01:  20%|█▉        | 949/4850 [04:31<18:51,  3.45it/s]

Epoch: 2, Loss: 2.5449159145355225


Processing epoch 01:  20%|█▉        | 950/4850 [04:31<19:14,  3.38it/s]

Epoch: 2, Loss: 3.166839599609375


Processing epoch 01:  20%|█▉        | 951/4850 [04:31<19:09,  3.39it/s]

Epoch: 2, Loss: 2.208169937133789


Processing epoch 01:  20%|█▉        | 952/4850 [04:32<19:00,  3.42it/s]

Epoch: 2, Loss: 2.455606460571289


Processing epoch 01:  20%|█▉        | 953/4850 [04:32<19:15,  3.37it/s]

Epoch: 2, Loss: 2.7084474563598633


Processing epoch 01:  20%|█▉        | 954/4850 [04:32<19:14,  3.37it/s]

Epoch: 2, Loss: 2.5424976348876953


Processing epoch 01:  20%|█▉        | 955/4850 [04:32<19:18,  3.36it/s]

Epoch: 2, Loss: 2.6328837871551514


Processing epoch 01:  20%|█▉        | 956/4850 [04:33<19:15,  3.37it/s]

Epoch: 2, Loss: 2.5383734703063965


Processing epoch 01:  20%|█▉        | 957/4850 [04:33<19:26,  3.34it/s]

Epoch: 2, Loss: 2.6981863975524902


Processing epoch 01:  20%|█▉        | 958/4850 [04:33<19:29,  3.33it/s]

Epoch: 2, Loss: 2.4914703369140625


Processing epoch 01:  20%|█▉        | 959/4850 [04:34<19:19,  3.36it/s]

Epoch: 2, Loss: 2.459714651107788


Processing epoch 01:  20%|█▉        | 960/4850 [04:34<19:15,  3.37it/s]

Epoch: 2, Loss: 3.1945536136627197


Processing epoch 01:  20%|█▉        | 961/4850 [04:34<19:31,  3.32it/s]

Epoch: 2, Loss: 2.308825731277466


Processing epoch 01:  20%|█▉        | 962/4850 [04:35<19:08,  3.38it/s]

Epoch: 2, Loss: 2.9550116062164307


Processing epoch 01:  20%|█▉        | 963/4850 [04:35<18:47,  3.45it/s]

Epoch: 2, Loss: 2.6603212356567383


Processing epoch 01:  20%|█▉        | 964/4850 [04:35<18:30,  3.50it/s]

Epoch: 2, Loss: 4.293450832366943


Processing epoch 01:  20%|█▉        | 965/4850 [04:35<18:21,  3.53it/s]

Epoch: 2, Loss: 2.7442612648010254


Processing epoch 01:  20%|█▉        | 966/4850 [04:36<18:14,  3.55it/s]

Epoch: 2, Loss: 3.041503429412842


Processing epoch 01:  20%|█▉        | 967/4850 [04:36<18:11,  3.56it/s]

Epoch: 2, Loss: 3.746973991394043


Processing epoch 01:  20%|█▉        | 968/4850 [04:36<18:13,  3.55it/s]

Epoch: 2, Loss: 2.5297770500183105


Processing epoch 01:  20%|█▉        | 969/4850 [04:37<18:08,  3.57it/s]

Epoch: 2, Loss: 2.927741050720215


Processing epoch 01:  20%|██        | 970/4850 [04:37<18:12,  3.55it/s]

Epoch: 2, Loss: 2.745553731918335


Processing epoch 01:  20%|██        | 971/4850 [04:37<18:14,  3.54it/s]

Epoch: 2, Loss: 2.8740880489349365


Processing epoch 01:  20%|██        | 972/4850 [04:37<18:11,  3.55it/s]

Epoch: 2, Loss: 3.2167506217956543


Processing epoch 01:  20%|██        | 973/4850 [04:38<18:11,  3.55it/s]

Epoch: 2, Loss: 2.358520030975342


Processing epoch 01:  20%|██        | 974/4850 [04:38<18:05,  3.57it/s]

Epoch: 2, Loss: 3.842534065246582


Processing epoch 01:  20%|██        | 975/4850 [04:38<18:09,  3.56it/s]

Epoch: 2, Loss: 2.468400716781616


Processing epoch 01:  20%|██        | 976/4850 [04:38<18:08,  3.56it/s]

Epoch: 2, Loss: 2.518869161605835


Processing epoch 01:  20%|██        | 977/4850 [04:39<18:08,  3.56it/s]

Epoch: 2, Loss: 3.2498583793640137


Processing epoch 01:  20%|██        | 978/4850 [04:39<18:09,  3.56it/s]

Epoch: 2, Loss: 2.2713370323181152


Processing epoch 01:  20%|██        | 979/4850 [04:39<18:08,  3.56it/s]

Epoch: 2, Loss: 2.952765941619873


Processing epoch 01:  20%|██        | 980/4850 [04:40<18:10,  3.55it/s]

Epoch: 2, Loss: 2.4715733528137207


Processing epoch 01:  20%|██        | 981/4850 [04:40<18:08,  3.55it/s]

Epoch: 2, Loss: 2.8841700553894043


Processing epoch 01:  20%|██        | 982/4850 [04:40<18:03,  3.57it/s]

Epoch: 2, Loss: 4.296380996704102


Processing epoch 01:  20%|██        | 983/4850 [04:40<18:00,  3.58it/s]

Epoch: 2, Loss: 2.7725110054016113


Processing epoch 01:  20%|██        | 984/4850 [04:41<18:03,  3.57it/s]

Epoch: 2, Loss: 2.8087844848632812


Processing epoch 01:  20%|██        | 985/4850 [04:41<18:03,  3.57it/s]

Epoch: 2, Loss: 2.9103612899780273


Processing epoch 01:  20%|██        | 986/4850 [04:41<18:08,  3.55it/s]

Epoch: 2, Loss: 3.050149917602539


Processing epoch 01:  20%|██        | 987/4850 [04:42<18:07,  3.55it/s]

Epoch: 2, Loss: 2.686124086380005


Processing epoch 01:  20%|██        | 988/4850 [04:42<18:11,  3.54it/s]

Epoch: 2, Loss: 2.659885883331299


Processing epoch 01:  20%|██        | 989/4850 [04:42<18:04,  3.56it/s]

Epoch: 2, Loss: 2.6949479579925537


Processing epoch 01:  20%|██        | 990/4850 [04:42<18:07,  3.55it/s]

Epoch: 2, Loss: 2.3858017921447754


Processing epoch 01:  20%|██        | 991/4850 [04:43<18:07,  3.55it/s]

Epoch: 2, Loss: 2.824694871902466


Processing epoch 01:  20%|██        | 992/4850 [04:43<18:06,  3.55it/s]

Epoch: 2, Loss: 2.666787624359131


Processing epoch 01:  20%|██        | 993/4850 [04:43<18:06,  3.55it/s]

Epoch: 2, Loss: 2.7472786903381348


Processing epoch 01:  20%|██        | 994/4850 [04:44<18:06,  3.55it/s]

Epoch: 2, Loss: 1.8771367073059082


Processing epoch 01:  21%|██        | 995/4850 [04:44<18:09,  3.54it/s]

Epoch: 2, Loss: 2.6943352222442627


Processing epoch 01:  21%|██        | 996/4850 [04:44<18:06,  3.55it/s]

Epoch: 2, Loss: 3.1312317848205566


Processing epoch 01:  21%|██        | 997/4850 [04:44<18:28,  3.48it/s]

Epoch: 2, Loss: 2.9581642150878906


Processing epoch 01:  21%|██        | 998/4850 [04:45<18:47,  3.41it/s]

Epoch: 2, Loss: 2.373380661010742


Processing epoch 01:  21%|██        | 999/4850 [04:45<19:01,  3.37it/s]

Epoch: 2, Loss: 2.828896999359131


Processing epoch 01:  21%|██        | 1000/4850 [04:45<19:21,  3.31it/s]

Epoch: 2, Loss: 2.9147956371307373


Processing epoch 01:  21%|██        | 1001/4850 [04:46<19:17,  3.33it/s]

Epoch: 2, Loss: 2.343662738800049


Processing epoch 01:  21%|██        | 1002/4850 [04:46<19:15,  3.33it/s]

Epoch: 2, Loss: 2.5065832138061523


Processing epoch 01:  21%|██        | 1003/4850 [04:46<18:58,  3.38it/s]

Epoch: 2, Loss: 2.461569309234619


Processing epoch 01:  21%|██        | 1004/4850 [04:47<19:02,  3.37it/s]

Epoch: 2, Loss: 2.498721122741699


Processing epoch 01:  21%|██        | 1005/4850 [04:47<19:03,  3.36it/s]

Epoch: 2, Loss: 2.8457937240600586


Processing epoch 01:  21%|██        | 1006/4850 [04:47<19:11,  3.34it/s]

Epoch: 2, Loss: 3.2849466800689697


Processing epoch 01:  21%|██        | 1007/4850 [04:47<19:02,  3.36it/s]

Epoch: 2, Loss: 3.16400146484375


Processing epoch 01:  21%|██        | 1008/4850 [04:48<19:02,  3.36it/s]

Epoch: 2, Loss: 2.715153455734253


Processing epoch 01:  21%|██        | 1009/4850 [04:48<19:04,  3.36it/s]

Epoch: 2, Loss: 2.7600858211517334


Processing epoch 01:  21%|██        | 1010/4850 [04:48<19:20,  3.31it/s]

Epoch: 2, Loss: 3.3350319862365723


Processing epoch 01:  21%|██        | 1011/4850 [04:49<19:05,  3.35it/s]

Epoch: 2, Loss: 2.330533027648926


Processing epoch 01:  21%|██        | 1012/4850 [04:49<18:44,  3.41it/s]

Epoch: 2, Loss: 2.9927353858947754


Processing epoch 01:  21%|██        | 1013/4850 [04:49<18:34,  3.44it/s]

Epoch: 2, Loss: 2.5476953983306885


Processing epoch 01:  21%|██        | 1014/4850 [04:49<18:25,  3.47it/s]

Epoch: 2, Loss: 2.6088500022888184


Processing epoch 01:  21%|██        | 1015/4850 [04:50<18:23,  3.48it/s]

Epoch: 2, Loss: 2.2338504791259766


Processing epoch 01:  21%|██        | 1016/4850 [04:50<18:14,  3.50it/s]

Epoch: 2, Loss: 2.578374147415161


Processing epoch 01:  21%|██        | 1017/4850 [04:50<18:06,  3.53it/s]

Epoch: 2, Loss: 2.8631107807159424


Processing epoch 01:  21%|██        | 1018/4850 [04:51<18:01,  3.54it/s]

Epoch: 2, Loss: 3.6707334518432617


Processing epoch 01:  21%|██        | 1019/4850 [04:51<18:05,  3.53it/s]

Epoch: 2, Loss: 3.1377224922180176


Processing epoch 01:  21%|██        | 1020/4850 [04:51<17:58,  3.55it/s]

Epoch: 2, Loss: 2.4920332431793213


Processing epoch 01:  21%|██        | 1021/4850 [04:51<17:56,  3.56it/s]

Epoch: 2, Loss: 2.3525643348693848


Processing epoch 01:  21%|██        | 1022/4850 [04:52<18:02,  3.54it/s]

Epoch: 2, Loss: 3.033491611480713


Processing epoch 01:  21%|██        | 1023/4850 [04:52<18:01,  3.54it/s]

Epoch: 2, Loss: 2.5727405548095703


Processing epoch 01:  21%|██        | 1024/4850 [04:52<18:01,  3.54it/s]

Epoch: 2, Loss: 2.7001214027404785


Processing epoch 01:  21%|██        | 1025/4850 [04:53<17:58,  3.55it/s]

Epoch: 2, Loss: 2.198298454284668


Processing epoch 01:  21%|██        | 1026/4850 [04:53<17:57,  3.55it/s]

Epoch: 2, Loss: 2.356065273284912


Processing epoch 01:  21%|██        | 1027/4850 [04:53<17:57,  3.55it/s]

Epoch: 2, Loss: 2.3424315452575684


Processing epoch 01:  21%|██        | 1028/4850 [04:53<17:59,  3.54it/s]

Epoch: 2, Loss: 2.1868534088134766


Processing epoch 01:  21%|██        | 1029/4850 [04:54<18:06,  3.52it/s]

Epoch: 2, Loss: 2.205045700073242


Processing epoch 01:  21%|██        | 1030/4850 [04:54<18:06,  3.52it/s]

Epoch: 2, Loss: 2.1518821716308594


Processing epoch 01:  21%|██▏       | 1031/4850 [04:54<18:05,  3.52it/s]

Epoch: 2, Loss: 2.7788543701171875


Processing epoch 01:  21%|██▏       | 1032/4850 [04:55<18:01,  3.53it/s]

Epoch: 2, Loss: 2.927766799926758


Processing epoch 01:  21%|██▏       | 1033/4850 [04:55<18:02,  3.53it/s]

Epoch: 2, Loss: 2.8180713653564453


Processing epoch 01:  21%|██▏       | 1034/4850 [04:55<17:56,  3.54it/s]

Epoch: 2, Loss: 2.4316787719726562


Processing epoch 01:  21%|██▏       | 1035/4850 [04:55<17:55,  3.55it/s]

Epoch: 2, Loss: 2.0868723392486572


Processing epoch 01:  21%|██▏       | 1036/4850 [04:56<17:47,  3.57it/s]

Epoch: 2, Loss: 2.5715060234069824


Processing epoch 01:  21%|██▏       | 1037/4850 [04:56<17:45,  3.58it/s]

Epoch: 2, Loss: 3.846226215362549


Processing epoch 01:  21%|██▏       | 1038/4850 [04:56<17:49,  3.56it/s]

Epoch: 2, Loss: 2.0581002235412598


Processing epoch 01:  21%|██▏       | 1039/4850 [04:57<17:44,  3.58it/s]

Epoch: 2, Loss: 2.36702823638916


Processing epoch 01:  21%|██▏       | 1040/4850 [04:57<18:00,  3.52it/s]

Epoch: 2, Loss: 2.935703992843628


Processing epoch 01:  21%|██▏       | 1041/4850 [04:57<18:06,  3.50it/s]

Epoch: 2, Loss: 2.1266236305236816


Processing epoch 01:  21%|██▏       | 1042/4850 [04:57<18:00,  3.52it/s]

Epoch: 2, Loss: 3.1832380294799805


Processing epoch 01:  22%|██▏       | 1043/4850 [04:58<17:59,  3.53it/s]

Epoch: 2, Loss: 2.7433838844299316


Processing epoch 01:  22%|██▏       | 1044/4850 [04:58<17:57,  3.53it/s]

Epoch: 2, Loss: 2.6984972953796387


Processing epoch 01:  22%|██▏       | 1045/4850 [04:58<17:54,  3.54it/s]

Epoch: 2, Loss: 2.618828296661377


Processing epoch 01:  22%|██▏       | 1046/4850 [04:59<18:05,  3.50it/s]

Epoch: 2, Loss: 2.5638632774353027


Processing epoch 01:  22%|██▏       | 1047/4850 [04:59<18:31,  3.42it/s]

Epoch: 2, Loss: 2.3076858520507812


Processing epoch 01:  22%|██▏       | 1048/4850 [04:59<18:30,  3.42it/s]

Epoch: 2, Loss: 2.4815688133239746


Processing epoch 01:  22%|██▏       | 1049/4850 [04:59<18:51,  3.36it/s]

Epoch: 2, Loss: 2.3310070037841797


Processing epoch 01:  22%|██▏       | 1050/4850 [05:00<18:38,  3.40it/s]

Epoch: 2, Loss: 2.511669158935547


Processing epoch 01:  22%|██▏       | 1051/4850 [05:00<18:37,  3.40it/s]

Epoch: 2, Loss: 2.6886324882507324


Processing epoch 01:  22%|██▏       | 1052/4850 [05:00<18:42,  3.38it/s]

Epoch: 2, Loss: 2.7935688495635986


Processing epoch 01:  22%|██▏       | 1053/4850 [05:01<19:13,  3.29it/s]

Epoch: 2, Loss: 2.8057353496551514


Processing epoch 01:  22%|██▏       | 1054/4850 [05:01<18:55,  3.34it/s]

Epoch: 2, Loss: 2.447457790374756


Processing epoch 01:  22%|██▏       | 1055/4850 [05:01<19:06,  3.31it/s]

Epoch: 2, Loss: 2.27107310295105


Processing epoch 01:  22%|██▏       | 1056/4850 [05:02<18:53,  3.35it/s]

Epoch: 2, Loss: 2.4965662956237793


Processing epoch 01:  22%|██▏       | 1057/4850 [05:02<18:43,  3.38it/s]

Epoch: 2, Loss: 2.9285335540771484


Processing epoch 01:  22%|██▏       | 1058/4850 [05:02<18:45,  3.37it/s]

Epoch: 2, Loss: 2.6619019508361816


Processing epoch 01:  22%|██▏       | 1059/4850 [05:02<18:52,  3.35it/s]

Epoch: 2, Loss: 3.055903434753418


Processing epoch 01:  22%|██▏       | 1060/4850 [05:03<18:34,  3.40it/s]

Epoch: 2, Loss: 2.227199077606201


Processing epoch 01:  22%|██▏       | 1061/4850 [05:03<18:17,  3.45it/s]

Epoch: 2, Loss: 3.0146937370300293


Processing epoch 01:  22%|██▏       | 1062/4850 [05:03<18:04,  3.49it/s]

Epoch: 2, Loss: 2.410991668701172


Processing epoch 01:  22%|██▏       | 1063/4850 [05:04<17:55,  3.52it/s]

Epoch: 2, Loss: 2.5819780826568604


Processing epoch 01:  22%|██▏       | 1064/4850 [05:04<17:51,  3.53it/s]

Epoch: 2, Loss: 2.373772144317627


Processing epoch 01:  22%|██▏       | 1065/4850 [05:04<17:46,  3.55it/s]

Epoch: 2, Loss: 3.000615358352661


Processing epoch 01:  22%|██▏       | 1066/4850 [05:04<17:45,  3.55it/s]

Epoch: 2, Loss: 2.5833852291107178


Processing epoch 01:  22%|██▏       | 1067/4850 [05:05<17:44,  3.55it/s]

Epoch: 2, Loss: 2.6747045516967773


Processing epoch 01:  22%|██▏       | 1068/4850 [05:05<17:44,  3.55it/s]

Epoch: 2, Loss: 3.0415804386138916


Processing epoch 01:  22%|██▏       | 1069/4850 [05:05<17:54,  3.52it/s]

Epoch: 2, Loss: 2.763779878616333


Processing epoch 01:  22%|██▏       | 1070/4850 [05:06<17:51,  3.53it/s]

Epoch: 2, Loss: 3.034268379211426


Processing epoch 01:  22%|██▏       | 1071/4850 [05:06<17:46,  3.54it/s]

Epoch: 2, Loss: 3.2291860580444336


Processing epoch 01:  22%|██▏       | 1072/4850 [05:06<17:43,  3.55it/s]

Epoch: 2, Loss: 2.994239091873169


Processing epoch 01:  22%|██▏       | 1073/4850 [05:06<17:47,  3.54it/s]

Epoch: 2, Loss: 2.2959446907043457


Processing epoch 01:  22%|██▏       | 1074/4850 [05:07<17:46,  3.54it/s]

Epoch: 2, Loss: 2.7648987770080566


Processing epoch 01:  22%|██▏       | 1075/4850 [05:07<17:40,  3.56it/s]

Epoch: 2, Loss: 3.7494077682495117


Processing epoch 01:  22%|██▏       | 1076/4850 [05:07<17:44,  3.54it/s]

Epoch: 2, Loss: 2.242885112762451


Processing epoch 01:  22%|██▏       | 1077/4850 [05:07<17:42,  3.55it/s]

Epoch: 2, Loss: 2.649261474609375


Processing epoch 01:  22%|██▏       | 1078/4850 [05:08<17:43,  3.55it/s]

Epoch: 2, Loss: 2.1132681369781494


Processing epoch 01:  22%|██▏       | 1079/4850 [05:08<17:42,  3.55it/s]

Epoch: 2, Loss: 2.345367670059204


Processing epoch 01:  22%|██▏       | 1080/4850 [05:08<17:39,  3.56it/s]

Epoch: 2, Loss: 2.6107282638549805


Processing epoch 01:  22%|██▏       | 1081/4850 [05:09<17:38,  3.56it/s]

Epoch: 2, Loss: 3.038043975830078


Processing epoch 01:  22%|██▏       | 1082/4850 [05:09<17:37,  3.56it/s]

Epoch: 2, Loss: 2.518700122833252


Processing epoch 01:  22%|██▏       | 1083/4850 [05:09<17:34,  3.57it/s]

Epoch: 2, Loss: 3.2609188556671143


Processing epoch 01:  22%|██▏       | 1084/4850 [05:09<17:33,  3.58it/s]

Epoch: 2, Loss: 2.5964150428771973


Processing epoch 01:  22%|██▏       | 1085/4850 [05:10<17:29,  3.59it/s]

Epoch: 2, Loss: 4.492458820343018


Processing epoch 01:  22%|██▏       | 1086/4850 [05:10<17:29,  3.59it/s]

Epoch: 2, Loss: 3.0798892974853516


Processing epoch 01:  22%|██▏       | 1087/4850 [05:10<17:36,  3.56it/s]

Epoch: 2, Loss: 2.831796407699585


Processing epoch 01:  22%|██▏       | 1088/4850 [05:11<17:40,  3.55it/s]

Epoch: 2, Loss: 2.6035332679748535


Processing epoch 01:  22%|██▏       | 1089/4850 [05:11<17:43,  3.54it/s]

Epoch: 2, Loss: 2.7125682830810547


Processing epoch 01:  22%|██▏       | 1090/4850 [05:11<17:42,  3.54it/s]

Epoch: 2, Loss: 2.7605738639831543


Processing epoch 01:  22%|██▏       | 1091/4850 [05:11<17:41,  3.54it/s]

Epoch: 2, Loss: 2.3021163940429688


Processing epoch 01:  23%|██▎       | 1092/4850 [05:12<17:41,  3.54it/s]

Epoch: 2, Loss: 2.2870068550109863


Processing epoch 01:  23%|██▎       | 1093/4850 [05:12<17:42,  3.54it/s]

Epoch: 2, Loss: 2.7174770832061768


Processing epoch 01:  23%|██▎       | 1094/4850 [05:12<17:42,  3.53it/s]

Epoch: 2, Loss: 2.943366527557373


Processing epoch 01:  23%|██▎       | 1095/4850 [05:13<17:44,  3.53it/s]

Epoch: 2, Loss: 2.8489127159118652


Processing epoch 01:  23%|██▎       | 1096/4850 [05:13<17:59,  3.48it/s]

Epoch: 2, Loss: 2.3359413146972656


Processing epoch 01:  23%|██▎       | 1097/4850 [05:13<18:09,  3.44it/s]

Epoch: 2, Loss: 2.384321451187134


Processing epoch 01:  23%|██▎       | 1098/4850 [05:13<18:13,  3.43it/s]

Epoch: 2, Loss: 2.705932855606079


Processing epoch 01:  23%|██▎       | 1099/4850 [05:14<18:07,  3.45it/s]

Epoch: 2, Loss: 2.6670708656311035


Processing epoch 01:  23%|██▎       | 1100/4850 [05:14<18:02,  3.46it/s]

Epoch: 2, Loss: 3.4760451316833496


Processing epoch 01:  23%|██▎       | 1101/4850 [05:14<18:19,  3.41it/s]

Epoch: 2, Loss: 3.271033763885498


Processing epoch 01:  23%|██▎       | 1102/4850 [05:15<18:05,  3.45it/s]

Epoch: 2, Loss: 2.5256316661834717


Processing epoch 01:  23%|██▎       | 1103/4850 [05:15<17:59,  3.47it/s]

Epoch: 2, Loss: 2.807375431060791


Processing epoch 01:  23%|██▎       | 1104/4850 [05:15<18:24,  3.39it/s]

Epoch: 2, Loss: 3.007086753845215


Processing epoch 01:  23%|██▎       | 1105/4850 [05:15<18:27,  3.38it/s]

Epoch: 2, Loss: 2.898439407348633


Processing epoch 01:  23%|██▎       | 1106/4850 [05:16<18:26,  3.38it/s]

Epoch: 2, Loss: 2.4024760723114014


Processing epoch 01:  23%|██▎       | 1107/4850 [05:16<18:09,  3.44it/s]

Epoch: 2, Loss: 3.900200843811035


Processing epoch 01:  23%|██▎       | 1108/4850 [05:16<18:03,  3.45it/s]

Epoch: 2, Loss: 2.2984166145324707


Processing epoch 01:  23%|██▎       | 1109/4850 [05:17<18:13,  3.42it/s]

Epoch: 2, Loss: 2.5863959789276123


Processing epoch 01:  23%|██▎       | 1110/4850 [05:17<18:30,  3.37it/s]

Epoch: 2, Loss: 2.051239013671875


Processing epoch 01:  23%|██▎       | 1111/4850 [05:17<18:13,  3.42it/s]

Epoch: 2, Loss: 3.323092460632324


Processing epoch 01:  23%|██▎       | 1112/4850 [05:18<17:59,  3.46it/s]

Epoch: 2, Loss: 2.716263771057129


Processing epoch 01:  23%|██▎       | 1113/4850 [05:18<17:49,  3.49it/s]

Epoch: 2, Loss: 2.1091649532318115


Processing epoch 01:  23%|██▎       | 1114/4850 [05:18<17:40,  3.52it/s]

Epoch: 2, Loss: 2.7724337577819824


Processing epoch 01:  23%|██▎       | 1115/4850 [05:18<17:34,  3.54it/s]

Epoch: 2, Loss: 2.8171417713165283


Processing epoch 01:  23%|██▎       | 1116/4850 [05:19<17:36,  3.53it/s]

Epoch: 2, Loss: 3.079648494720459


Processing epoch 01:  23%|██▎       | 1117/4850 [05:19<17:33,  3.54it/s]

Epoch: 2, Loss: 2.8724398612976074


Processing epoch 01:  23%|██▎       | 1118/4850 [05:19<17:32,  3.55it/s]

Epoch: 2, Loss: 4.214639663696289


Processing epoch 01:  23%|██▎       | 1119/4850 [05:19<17:32,  3.54it/s]

Epoch: 2, Loss: 2.6711483001708984


Processing epoch 01:  23%|██▎       | 1120/4850 [05:20<17:36,  3.53it/s]

Epoch: 2, Loss: 2.7053849697113037


Processing epoch 01:  23%|██▎       | 1121/4850 [05:20<17:31,  3.54it/s]

Epoch: 2, Loss: 2.270355463027954


Processing epoch 01:  23%|██▎       | 1122/4850 [05:20<17:32,  3.54it/s]

Epoch: 2, Loss: 2.457268714904785


Processing epoch 01:  23%|██▎       | 1123/4850 [05:21<17:29,  3.55it/s]

Epoch: 2, Loss: 2.177095651626587


Processing epoch 01:  23%|██▎       | 1124/4850 [05:21<17:28,  3.55it/s]

Epoch: 2, Loss: 2.537710428237915


Processing epoch 01:  23%|██▎       | 1125/4850 [05:21<17:31,  3.54it/s]

Epoch: 2, Loss: 3.1321682929992676


Processing epoch 01:  23%|██▎       | 1126/4850 [05:21<17:28,  3.55it/s]

Epoch: 2, Loss: 3.235283136367798


Processing epoch 01:  23%|██▎       | 1127/4850 [05:22<17:34,  3.53it/s]

Epoch: 2, Loss: 2.8047685623168945


Processing epoch 01:  23%|██▎       | 1128/4850 [05:22<17:29,  3.55it/s]

Epoch: 2, Loss: 2.6959352493286133


Processing epoch 01:  23%|██▎       | 1129/4850 [05:22<17:33,  3.53it/s]

Epoch: 2, Loss: 3.0695252418518066


Processing epoch 01:  23%|██▎       | 1130/4850 [05:23<17:27,  3.55it/s]

Epoch: 2, Loss: 3.5653092861175537


Processing epoch 01:  23%|██▎       | 1131/4850 [05:23<17:32,  3.53it/s]

Epoch: 2, Loss: 2.3950448036193848


Processing epoch 01:  23%|██▎       | 1132/4850 [05:23<17:28,  3.54it/s]

Epoch: 2, Loss: 2.5902490615844727


Processing epoch 01:  23%|██▎       | 1133/4850 [05:23<17:26,  3.55it/s]

Epoch: 2, Loss: 2.6947736740112305


Processing epoch 01:  23%|██▎       | 1134/4850 [05:24<17:25,  3.55it/s]

Epoch: 2, Loss: 2.4862639904022217


Processing epoch 01:  23%|██▎       | 1135/4850 [05:24<17:28,  3.54it/s]

Epoch: 2, Loss: 2.682872772216797


Processing epoch 01:  23%|██▎       | 1136/4850 [05:24<17:29,  3.54it/s]

Epoch: 2, Loss: 3.3087692260742188


Processing epoch 01:  23%|██▎       | 1137/4850 [05:25<17:30,  3.53it/s]

Epoch: 2, Loss: 2.928713798522949


Processing epoch 01:  23%|██▎       | 1138/4850 [05:25<17:27,  3.54it/s]

Epoch: 2, Loss: 3.0349974632263184


Processing epoch 01:  23%|██▎       | 1139/4850 [05:25<17:33,  3.52it/s]

Epoch: 2, Loss: 2.3653225898742676


Processing epoch 01:  24%|██▎       | 1140/4850 [05:25<17:31,  3.53it/s]

Epoch: 2, Loss: 2.9457201957702637


Processing epoch 01:  24%|██▎       | 1141/4850 [05:26<17:28,  3.54it/s]

Epoch: 2, Loss: 3.3616905212402344


Processing epoch 01:  24%|██▎       | 1142/4850 [05:26<17:27,  3.54it/s]

Epoch: 2, Loss: 3.644906997680664


Processing epoch 01:  24%|██▎       | 1143/4850 [05:26<17:24,  3.55it/s]

Epoch: 2, Loss: 1.89822256565094


Processing epoch 01:  24%|██▎       | 1144/4850 [05:27<17:25,  3.55it/s]

Epoch: 2, Loss: 2.4496164321899414


Processing epoch 01:  24%|██▎       | 1145/4850 [05:27<17:25,  3.54it/s]

Epoch: 2, Loss: 2.536120653152466


Processing epoch 01:  24%|██▎       | 1146/4850 [05:27<17:51,  3.46it/s]

Epoch: 2, Loss: 2.7087130546569824


Processing epoch 01:  24%|██▎       | 1147/4850 [05:27<17:55,  3.44it/s]

Epoch: 2, Loss: 2.5448403358459473


Processing epoch 01:  24%|██▎       | 1148/4850 [05:28<18:05,  3.41it/s]

Epoch: 2, Loss: 3.1455740928649902


Processing epoch 01:  24%|██▎       | 1149/4850 [05:28<18:16,  3.37it/s]

Epoch: 2, Loss: 2.1810073852539062


Processing epoch 01:  24%|██▎       | 1150/4850 [05:28<18:08,  3.40it/s]

Epoch: 2, Loss: 2.8564608097076416


Processing epoch 01:  24%|██▎       | 1151/4850 [05:29<17:51,  3.45it/s]

Epoch: 2, Loss: 3.7123546600341797


Processing epoch 01:  24%|██▍       | 1152/4850 [05:29<17:46,  3.47it/s]

Epoch: 2, Loss: 2.570736885070801


Processing epoch 01:  24%|██▍       | 1153/4850 [05:29<18:03,  3.41it/s]

Epoch: 2, Loss: 2.792987108230591


Processing epoch 01:  24%|██▍       | 1154/4850 [05:29<18:05,  3.41it/s]

Epoch: 2, Loss: 2.390063524246216


Processing epoch 01:  24%|██▍       | 1155/4850 [05:30<18:13,  3.38it/s]

Epoch: 2, Loss: 3.0438485145568848


Processing epoch 01:  24%|██▍       | 1156/4850 [05:30<18:10,  3.39it/s]

Epoch: 2, Loss: 2.177055835723877


Processing epoch 01:  24%|██▍       | 1157/4850 [05:30<18:10,  3.39it/s]

Epoch: 2, Loss: 2.5615015029907227


Processing epoch 01:  24%|██▍       | 1158/4850 [05:31<17:55,  3.43it/s]

Epoch: 2, Loss: 2.77933406829834


Processing epoch 01:  24%|██▍       | 1159/4850 [05:31<18:10,  3.38it/s]

Epoch: 2, Loss: 3.0260448455810547


Processing epoch 01:  24%|██▍       | 1160/4850 [05:31<18:17,  3.36it/s]

Epoch: 2, Loss: 2.779757261276245


Processing epoch 01:  24%|██▍       | 1161/4850 [05:32<17:59,  3.42it/s]

Epoch: 2, Loss: 2.3954148292541504


Processing epoch 01:  24%|██▍       | 1162/4850 [05:32<17:46,  3.46it/s]

Epoch: 2, Loss: 2.734015703201294


Processing epoch 01:  24%|██▍       | 1163/4850 [05:32<17:32,  3.50it/s]

Epoch: 2, Loss: 2.8140172958374023


Processing epoch 01:  24%|██▍       | 1164/4850 [05:32<17:30,  3.51it/s]

Epoch: 2, Loss: 2.8133630752563477


Processing epoch 01:  24%|██▍       | 1165/4850 [05:33<17:29,  3.51it/s]

Epoch: 2, Loss: 2.790759325027466


Processing epoch 01:  24%|██▍       | 1166/4850 [05:33<17:25,  3.52it/s]

Epoch: 2, Loss: 3.6858339309692383


Processing epoch 01:  24%|██▍       | 1167/4850 [05:33<17:29,  3.51it/s]

Epoch: 2, Loss: 3.1112747192382812


Processing epoch 01:  24%|██▍       | 1168/4850 [05:34<17:30,  3.51it/s]

Epoch: 2, Loss: 2.067765951156616


Processing epoch 01:  24%|██▍       | 1169/4850 [05:34<17:21,  3.53it/s]

Epoch: 2, Loss: 2.3803906440734863


Processing epoch 01:  24%|██▍       | 1170/4850 [05:34<17:17,  3.55it/s]

Epoch: 2, Loss: 2.205615520477295


Processing epoch 01:  24%|██▍       | 1171/4850 [05:34<17:16,  3.55it/s]

Epoch: 2, Loss: 2.521028518676758


Processing epoch 01:  24%|██▍       | 1172/4850 [05:35<17:20,  3.54it/s]

Epoch: 2, Loss: 2.418307065963745


Processing epoch 01:  24%|██▍       | 1173/4850 [05:35<17:20,  3.53it/s]

Epoch: 2, Loss: 2.3263421058654785


Processing epoch 01:  24%|██▍       | 1174/4850 [05:35<17:17,  3.54it/s]

Epoch: 2, Loss: 2.8303840160369873


Processing epoch 01:  24%|██▍       | 1175/4850 [05:35<17:15,  3.55it/s]

Epoch: 2, Loss: 2.2196779251098633


Processing epoch 01:  24%|██▍       | 1176/4850 [05:36<17:13,  3.56it/s]

Epoch: 2, Loss: 2.5858402252197266


Processing epoch 01:  24%|██▍       | 1177/4850 [05:36<17:11,  3.56it/s]

Epoch: 2, Loss: 2.5339882373809814


Processing epoch 01:  24%|██▍       | 1178/4850 [05:36<17:13,  3.55it/s]

Epoch: 2, Loss: 2.760521411895752


Processing epoch 01:  24%|██▍       | 1179/4850 [05:37<17:14,  3.55it/s]

Epoch: 2, Loss: 2.713273286819458


Processing epoch 01:  24%|██▍       | 1180/4850 [05:37<17:11,  3.56it/s]

Epoch: 2, Loss: 2.4631969928741455


Processing epoch 01:  24%|██▍       | 1181/4850 [05:37<17:13,  3.55it/s]

Epoch: 2, Loss: 2.41404390335083


Processing epoch 01:  24%|██▍       | 1182/4850 [05:37<17:12,  3.55it/s]

Epoch: 2, Loss: 2.1967382431030273


Processing epoch 01:  24%|██▍       | 1183/4850 [05:38<17:09,  3.56it/s]

Epoch: 2, Loss: 2.3941097259521484


Processing epoch 01:  24%|██▍       | 1184/4850 [05:38<17:08,  3.56it/s]

Epoch: 2, Loss: 2.6458559036254883


Processing epoch 01:  24%|██▍       | 1185/4850 [05:38<17:05,  3.58it/s]

Epoch: 2, Loss: 2.438749313354492


Processing epoch 01:  24%|██▍       | 1186/4850 [05:39<17:07,  3.57it/s]

Epoch: 2, Loss: 3.2728700637817383


Processing epoch 01:  24%|██▍       | 1187/4850 [05:39<17:08,  3.56it/s]

Epoch: 2, Loss: 2.960653781890869


Processing epoch 01:  24%|██▍       | 1188/4850 [05:39<17:12,  3.55it/s]

Epoch: 2, Loss: 2.759948253631592


Processing epoch 01:  25%|██▍       | 1189/4850 [05:39<17:23,  3.51it/s]

Epoch: 2, Loss: 2.7468485832214355


Processing epoch 01:  25%|██▍       | 1190/4850 [05:40<17:18,  3.52it/s]

Epoch: 2, Loss: 2.5666208267211914


Processing epoch 01:  25%|██▍       | 1191/4850 [05:40<17:17,  3.53it/s]

Epoch: 2, Loss: 3.28110671043396


Processing epoch 01:  25%|██▍       | 1192/4850 [05:40<17:12,  3.54it/s]

Epoch: 2, Loss: 3.88472056388855


Processing epoch 01:  25%|██▍       | 1193/4850 [05:41<17:14,  3.54it/s]

Epoch: 2, Loss: 2.255554676055908


Processing epoch 01:  25%|██▍       | 1194/4850 [05:41<17:15,  3.53it/s]

Epoch: 2, Loss: 2.6828675270080566


Processing epoch 01:  25%|██▍       | 1195/4850 [05:41<17:12,  3.54it/s]

Epoch: 2, Loss: 2.8010830879211426


Processing epoch 01:  25%|██▍       | 1196/4850 [05:41<17:18,  3.52it/s]

Epoch: 2, Loss: 2.860858678817749


Processing epoch 01:  25%|██▍       | 1197/4850 [05:42<17:17,  3.52it/s]

Epoch: 2, Loss: 2.324289083480835


Processing epoch 01:  25%|██▍       | 1198/4850 [05:42<17:42,  3.44it/s]

Epoch: 2, Loss: 2.7194342613220215


Processing epoch 01:  25%|██▍       | 1199/4850 [05:42<17:30,  3.48it/s]

Epoch: 2, Loss: 2.810098648071289


Processing epoch 01:  25%|██▍       | 1200/4850 [05:43<17:48,  3.42it/s]

Epoch: 2, Loss: 2.9698128700256348


Processing epoch 01:  25%|██▍       | 1201/4850 [05:43<17:54,  3.39it/s]

Epoch: 2, Loss: 2.490208625793457


Processing epoch 01:  25%|██▍       | 1202/4850 [05:43<17:54,  3.40it/s]

Epoch: 2, Loss: 2.1982882022857666


Processing epoch 01:  25%|██▍       | 1203/4850 [05:43<17:40,  3.44it/s]

Epoch: 2, Loss: 3.1854705810546875


Processing epoch 01:  25%|██▍       | 1204/4850 [05:44<17:44,  3.43it/s]

Epoch: 2, Loss: 2.8677964210510254


Processing epoch 01:  25%|██▍       | 1205/4850 [05:44<17:39,  3.44it/s]

Epoch: 2, Loss: 2.844574451446533


Processing epoch 01:  25%|██▍       | 1206/4850 [05:44<17:46,  3.42it/s]

Epoch: 2, Loss: 2.3945164680480957


Processing epoch 01:  25%|██▍       | 1207/4850 [05:45<17:58,  3.38it/s]

Epoch: 2, Loss: 3.9030065536499023


Processing epoch 01:  25%|██▍       | 1208/4850 [05:45<18:10,  3.34it/s]

Epoch: 2, Loss: 2.587522029876709


Processing epoch 01:  25%|██▍       | 1209/4850 [05:45<18:08,  3.34it/s]

Epoch: 2, Loss: 2.780777931213379


Processing epoch 01:  25%|██▍       | 1210/4850 [05:46<17:59,  3.37it/s]

Epoch: 2, Loss: 2.7538790702819824


Processing epoch 01:  25%|██▍       | 1211/4850 [05:46<17:36,  3.44it/s]

Epoch: 2, Loss: 2.1139867305755615


Processing epoch 01:  25%|██▍       | 1212/4850 [05:46<17:28,  3.47it/s]

Epoch: 2, Loss: 2.4582207202911377


Processing epoch 01:  25%|██▌       | 1213/4850 [05:46<17:18,  3.50it/s]

Epoch: 2, Loss: 2.724668025970459


Processing epoch 01:  25%|██▌       | 1214/4850 [05:47<17:11,  3.53it/s]

Epoch: 2, Loss: 3.0705766677856445


Processing epoch 01:  25%|██▌       | 1215/4850 [05:47<17:05,  3.55it/s]

Epoch: 2, Loss: 3.773160457611084


Processing epoch 01:  25%|██▌       | 1216/4850 [05:47<17:00,  3.56it/s]

Epoch: 2, Loss: 3.4131221771240234


Processing epoch 01:  25%|██▌       | 1217/4850 [05:47<17:00,  3.56it/s]

Epoch: 2, Loss: 2.5824995040893555


Processing epoch 01:  25%|██▌       | 1218/4850 [05:48<17:00,  3.56it/s]

Epoch: 2, Loss: 3.856635570526123


Processing epoch 01:  25%|██▌       | 1219/4850 [05:48<16:54,  3.58it/s]

Epoch: 2, Loss: 3.952685594558716


Processing epoch 01:  25%|██▌       | 1220/4850 [05:48<16:49,  3.60it/s]

Epoch: 2, Loss: 2.908738613128662


Processing epoch 01:  25%|██▌       | 1221/4850 [05:49<16:51,  3.59it/s]

Epoch: 2, Loss: 2.8467044830322266


Processing epoch 01:  25%|██▌       | 1222/4850 [05:49<16:54,  3.57it/s]

Epoch: 2, Loss: 3.114542245864868


Processing epoch 01:  25%|██▌       | 1223/4850 [05:49<17:00,  3.55it/s]

Epoch: 2, Loss: 2.4383931159973145


Processing epoch 01:  25%|██▌       | 1224/4850 [05:49<16:56,  3.57it/s]

Epoch: 2, Loss: 3.6992006301879883


Processing epoch 01:  25%|██▌       | 1225/4850 [05:50<16:56,  3.57it/s]

Epoch: 2, Loss: 2.097598075866699


Processing epoch 01:  25%|██▌       | 1226/4850 [05:50<16:52,  3.58it/s]

Epoch: 2, Loss: 2.6559462547302246


Processing epoch 01:  25%|██▌       | 1227/4850 [05:50<16:55,  3.57it/s]

Epoch: 2, Loss: 2.6485049724578857


Processing epoch 01:  25%|██▌       | 1228/4850 [05:51<16:49,  3.59it/s]

Epoch: 2, Loss: 3.930068016052246


Processing epoch 01:  25%|██▌       | 1229/4850 [05:51<16:49,  3.59it/s]

Epoch: 2, Loss: 2.6299657821655273


Processing epoch 01:  25%|██▌       | 1230/4850 [05:51<16:51,  3.58it/s]

Epoch: 2, Loss: 3.2432494163513184


Processing epoch 01:  25%|██▌       | 1231/4850 [05:51<16:54,  3.57it/s]

Epoch: 2, Loss: 3.071657180786133


Processing epoch 01:  25%|██▌       | 1232/4850 [05:52<16:57,  3.56it/s]

Epoch: 2, Loss: 1.7616662979125977


Processing epoch 01:  25%|██▌       | 1233/4850 [05:52<17:01,  3.54it/s]

Epoch: 2, Loss: 2.4656033515930176


Processing epoch 01:  25%|██▌       | 1234/4850 [05:52<17:01,  3.54it/s]

Epoch: 2, Loss: 2.409851551055908


Processing epoch 01:  25%|██▌       | 1235/4850 [05:53<17:01,  3.54it/s]

Epoch: 2, Loss: 2.513716697692871


Processing epoch 01:  25%|██▌       | 1236/4850 [05:53<17:02,  3.54it/s]

Epoch: 2, Loss: 3.0925419330596924


Processing epoch 01:  26%|██▌       | 1237/4850 [05:53<17:00,  3.54it/s]

Epoch: 2, Loss: 3.5897722244262695


Processing epoch 01:  26%|██▌       | 1238/4850 [05:53<16:59,  3.54it/s]

Epoch: 2, Loss: 2.524388313293457


Processing epoch 01:  26%|██▌       | 1239/4850 [05:54<16:57,  3.55it/s]

Epoch: 2, Loss: 2.4423789978027344


Processing epoch 01:  26%|██▌       | 1240/4850 [05:54<17:03,  3.53it/s]

Epoch: 2, Loss: 2.5080978870391846


Processing epoch 01:  26%|██▌       | 1241/4850 [05:54<17:02,  3.53it/s]

Epoch: 2, Loss: 2.6490745544433594


Processing epoch 01:  26%|██▌       | 1242/4850 [05:55<17:00,  3.53it/s]

Epoch: 2, Loss: 2.9949660301208496


Processing epoch 01:  26%|██▌       | 1243/4850 [05:55<17:03,  3.53it/s]

Epoch: 2, Loss: 2.3722755908966064


Processing epoch 01:  26%|██▌       | 1244/4850 [05:55<17:02,  3.53it/s]

Epoch: 2, Loss: 2.6281988620758057


Processing epoch 01:  26%|██▌       | 1245/4850 [05:55<16:56,  3.55it/s]

Epoch: 2, Loss: 3.473567008972168


Processing epoch 01:  26%|██▌       | 1246/4850 [05:56<17:23,  3.45it/s]

Epoch: 2, Loss: 2.5843582153320312


Processing epoch 01:  26%|██▌       | 1247/4850 [05:56<17:26,  3.44it/s]

Epoch: 2, Loss: 2.694559097290039


Processing epoch 01:  26%|██▌       | 1248/4850 [05:56<17:17,  3.47it/s]

Epoch: 2, Loss: 2.0799307823181152


Processing epoch 01:  26%|██▌       | 1249/4850 [05:57<17:09,  3.50it/s]

Epoch: 2, Loss: 2.421597480773926


Processing epoch 01:  26%|██▌       | 1250/4850 [05:57<17:24,  3.45it/s]

Epoch: 2, Loss: 2.8337221145629883


Processing epoch 01:  26%|██▌       | 1251/4850 [05:57<17:36,  3.41it/s]

Epoch: 2, Loss: 2.6353516578674316


Processing epoch 01:  26%|██▌       | 1252/4850 [05:57<17:54,  3.35it/s]

Epoch: 2, Loss: 2.729018449783325


Processing epoch 01:  26%|██▌       | 1253/4850 [05:58<18:08,  3.30it/s]

Epoch: 2, Loss: 3.20640230178833


Processing epoch 01:  26%|██▌       | 1254/4850 [05:58<18:11,  3.29it/s]

Epoch: 2, Loss: 2.0803117752075195


Processing epoch 01:  26%|██▌       | 1255/4850 [05:58<18:01,  3.32it/s]

Epoch: 2, Loss: 2.529195547103882


Processing epoch 01:  26%|██▌       | 1256/4850 [05:59<18:07,  3.31it/s]

Epoch: 2, Loss: 2.3247437477111816


Processing epoch 01:  26%|██▌       | 1257/4850 [05:59<17:58,  3.33it/s]

Epoch: 2, Loss: 2.6854770183563232


Processing epoch 01:  26%|██▌       | 1258/4850 [05:59<18:04,  3.31it/s]

Epoch: 2, Loss: 2.306648015975952


Processing epoch 01:  26%|██▌       | 1259/4850 [06:00<18:02,  3.32it/s]

Epoch: 2, Loss: 2.6146817207336426


Processing epoch 01:  26%|██▌       | 1260/4850 [06:00<17:42,  3.38it/s]

Epoch: 2, Loss: 2.325777530670166


Processing epoch 01:  26%|██▌       | 1261/4850 [06:00<17:29,  3.42it/s]

Epoch: 2, Loss: 2.719604969024658


Processing epoch 01:  26%|██▌       | 1262/4850 [06:00<17:17,  3.46it/s]

Epoch: 2, Loss: 2.623772144317627


Processing epoch 01:  26%|██▌       | 1263/4850 [06:01<17:07,  3.49it/s]

Epoch: 2, Loss: 2.393230676651001


Processing epoch 01:  26%|██▌       | 1264/4850 [06:01<17:02,  3.51it/s]

Epoch: 2, Loss: 2.613448143005371


Processing epoch 01:  26%|██▌       | 1265/4850 [06:01<17:00,  3.51it/s]

Epoch: 2, Loss: 2.4721641540527344


Processing epoch 01:  26%|██▌       | 1266/4850 [06:02<16:56,  3.53it/s]

Epoch: 2, Loss: 2.8587722778320312


Processing epoch 01:  26%|██▌       | 1267/4850 [06:02<16:54,  3.53it/s]

Epoch: 2, Loss: 2.7105932235717773


Processing epoch 01:  26%|██▌       | 1268/4850 [06:02<16:49,  3.55it/s]

Epoch: 2, Loss: 2.217967987060547


Processing epoch 01:  26%|██▌       | 1269/4850 [06:02<16:59,  3.51it/s]

Epoch: 2, Loss: 2.7903809547424316


Processing epoch 01:  26%|██▌       | 1270/4850 [06:03<16:57,  3.52it/s]

Epoch: 2, Loss: 3.3495798110961914


Processing epoch 01:  26%|██▌       | 1271/4850 [06:03<16:54,  3.53it/s]

Epoch: 2, Loss: 2.9412729740142822


Processing epoch 01:  26%|██▌       | 1272/4850 [06:03<16:57,  3.52it/s]

Epoch: 2, Loss: 2.203903913497925


Processing epoch 01:  26%|██▌       | 1273/4850 [06:04<16:57,  3.51it/s]

Epoch: 2, Loss: 2.4523513317108154


Processing epoch 01:  26%|██▋       | 1274/4850 [06:04<16:50,  3.54it/s]

Epoch: 2, Loss: 2.866091728210449


Processing epoch 01:  26%|██▋       | 1275/4850 [06:04<16:48,  3.55it/s]

Epoch: 2, Loss: 2.8685812950134277


Processing epoch 01:  26%|██▋       | 1276/4850 [06:04<16:47,  3.55it/s]

Epoch: 2, Loss: 2.053558826446533


Processing epoch 01:  26%|██▋       | 1277/4850 [06:05<16:46,  3.55it/s]

Epoch: 2, Loss: 2.8399124145507812


Processing epoch 01:  26%|██▋       | 1278/4850 [06:05<16:41,  3.57it/s]

Epoch: 2, Loss: 3.644400119781494


Processing epoch 01:  26%|██▋       | 1279/4850 [06:05<16:43,  3.56it/s]

Epoch: 2, Loss: 2.400886058807373


Processing epoch 01:  26%|██▋       | 1280/4850 [06:06<16:58,  3.51it/s]

Epoch: 2, Loss: 2.162391424179077


Processing epoch 01:  26%|██▋       | 1281/4850 [06:06<16:51,  3.53it/s]

Epoch: 2, Loss: 2.324265480041504


Processing epoch 01:  26%|██▋       | 1282/4850 [06:06<16:47,  3.54it/s]

Epoch: 2, Loss: 2.1863150596618652


Processing epoch 01:  26%|██▋       | 1283/4850 [06:06<16:45,  3.55it/s]

Epoch: 2, Loss: 2.767392635345459


Processing epoch 01:  26%|██▋       | 1284/4850 [06:07<16:44,  3.55it/s]

Epoch: 2, Loss: 3.576923370361328


Processing epoch 01:  26%|██▋       | 1285/4850 [06:07<16:52,  3.52it/s]

Epoch: 2, Loss: 2.2446534633636475


Processing epoch 01:  27%|██▋       | 1286/4850 [06:07<16:54,  3.51it/s]

Epoch: 2, Loss: 2.1969223022460938


Processing epoch 01:  27%|██▋       | 1287/4850 [06:07<16:48,  3.53it/s]

Epoch: 2, Loss: 2.577941656112671


Processing epoch 01:  27%|██▋       | 1288/4850 [06:08<16:46,  3.54it/s]

Epoch: 2, Loss: 2.346658229827881


Processing epoch 01:  27%|██▋       | 1289/4850 [06:08<16:44,  3.54it/s]

Epoch: 2, Loss: 2.442873001098633


Processing epoch 01:  27%|██▋       | 1290/4850 [06:08<16:44,  3.55it/s]

Epoch: 2, Loss: 2.6155805587768555


Processing epoch 01:  27%|██▋       | 1291/4850 [06:09<16:54,  3.51it/s]

Epoch: 2, Loss: 2.7758970260620117


Processing epoch 01:  27%|██▋       | 1292/4850 [06:09<16:51,  3.52it/s]

Epoch: 2, Loss: 3.748554229736328


Processing epoch 01:  27%|██▋       | 1293/4850 [06:09<16:49,  3.52it/s]

Epoch: 2, Loss: 2.6804585456848145


Processing epoch 01:  27%|██▋       | 1294/4850 [06:09<16:45,  3.54it/s]

Epoch: 2, Loss: 2.653886556625366


Processing epoch 01:  27%|██▋       | 1295/4850 [06:10<16:54,  3.50it/s]

Epoch: 2, Loss: 2.6374921798706055


Processing epoch 01:  27%|██▋       | 1296/4850 [06:10<16:56,  3.50it/s]

Epoch: 2, Loss: 3.130953311920166


Processing epoch 01:  27%|██▋       | 1297/4850 [06:10<17:14,  3.44it/s]

Epoch: 2, Loss: 2.151649236679077


Processing epoch 01:  27%|██▋       | 1298/4850 [06:11<17:17,  3.42it/s]

Epoch: 2, Loss: 2.6142921447753906


Processing epoch 01:  27%|██▋       | 1299/4850 [06:11<17:18,  3.42it/s]

Epoch: 2, Loss: 2.9006893634796143


Processing epoch 01:  27%|██▋       | 1300/4850 [06:11<17:08,  3.45it/s]

Epoch: 2, Loss: 3.9926724433898926


Processing epoch 01:  27%|██▋       | 1301/4850 [06:11<16:59,  3.48it/s]

Epoch: 2, Loss: 2.7008116245269775


Processing epoch 01:  27%|██▋       | 1302/4850 [06:12<16:58,  3.49it/s]

Epoch: 2, Loss: 2.45763897895813


Processing epoch 01:  27%|██▋       | 1303/4850 [06:12<17:16,  3.42it/s]

Epoch: 2, Loss: 2.577821731567383


Processing epoch 01:  27%|██▋       | 1304/4850 [06:12<17:03,  3.46it/s]

Epoch: 2, Loss: 2.684502601623535


Processing epoch 01:  27%|██▋       | 1305/4850 [06:13<17:20,  3.41it/s]

Epoch: 2, Loss: 2.8427505493164062


Processing epoch 01:  27%|██▋       | 1306/4850 [06:13<17:28,  3.38it/s]

Epoch: 2, Loss: 2.206256866455078


Processing epoch 01:  27%|██▋       | 1307/4850 [06:13<17:11,  3.43it/s]

Epoch: 2, Loss: 2.895075798034668


Processing epoch 01:  27%|██▋       | 1308/4850 [06:14<17:21,  3.40it/s]

Epoch: 2, Loss: 2.678621768951416


Processing epoch 01:  27%|██▋       | 1309/4850 [06:14<17:31,  3.37it/s]

Epoch: 2, Loss: 2.2801246643066406


Processing epoch 01:  27%|██▋       | 1310/4850 [06:14<17:10,  3.44it/s]

Epoch: 2, Loss: 3.2800159454345703


Processing epoch 01:  27%|██▋       | 1311/4850 [06:14<17:00,  3.47it/s]

Epoch: 2, Loss: 2.6323060989379883


Processing epoch 01:  27%|██▋       | 1312/4850 [06:15<16:53,  3.49it/s]

Epoch: 2, Loss: 2.9468119144439697


Processing epoch 01:  27%|██▋       | 1313/4850 [06:15<17:02,  3.46it/s]

Epoch: 2, Loss: 2.1563215255737305


Processing epoch 01:  27%|██▋       | 1314/4850 [06:15<16:51,  3.50it/s]

Epoch: 2, Loss: 2.58250093460083


Processing epoch 01:  27%|██▋       | 1315/4850 [06:16<16:46,  3.51it/s]

Epoch: 2, Loss: 2.240516185760498


Processing epoch 01:  27%|██▋       | 1316/4850 [06:16<16:43,  3.52it/s]

Epoch: 2, Loss: 3.0645675659179688


Processing epoch 01:  27%|██▋       | 1317/4850 [06:16<16:39,  3.53it/s]

Epoch: 2, Loss: 2.3003463745117188


Processing epoch 01:  27%|██▋       | 1318/4850 [06:16<16:44,  3.52it/s]

Epoch: 2, Loss: 2.5131659507751465


Processing epoch 01:  27%|██▋       | 1319/4850 [06:17<16:47,  3.51it/s]

Epoch: 2, Loss: 2.6103715896606445


Processing epoch 01:  27%|██▋       | 1320/4850 [06:17<16:45,  3.51it/s]

Epoch: 2, Loss: 2.723876476287842


Processing epoch 01:  27%|██▋       | 1321/4850 [06:17<16:41,  3.52it/s]

Epoch: 2, Loss: 3.445953607559204


Processing epoch 01:  27%|██▋       | 1322/4850 [06:18<16:39,  3.53it/s]

Epoch: 2, Loss: 2.1030752658843994


Processing epoch 01:  27%|██▋       | 1323/4850 [06:18<16:37,  3.53it/s]

Epoch: 2, Loss: 3.143605947494507


Processing epoch 01:  27%|██▋       | 1324/4850 [06:18<16:44,  3.51it/s]

Epoch: 2, Loss: 2.6605310440063477


Processing epoch 01:  27%|██▋       | 1325/4850 [06:18<16:40,  3.52it/s]

Epoch: 2, Loss: 2.2530579566955566


Processing epoch 01:  27%|██▋       | 1326/4850 [06:19<16:38,  3.53it/s]

Epoch: 2, Loss: 2.593949794769287


Processing epoch 01:  27%|██▋       | 1327/4850 [06:19<16:36,  3.53it/s]

Epoch: 2, Loss: 2.3347482681274414


Processing epoch 01:  27%|██▋       | 1328/4850 [06:19<16:37,  3.53it/s]

Epoch: 2, Loss: 2.6170637607574463


Processing epoch 01:  27%|██▋       | 1329/4850 [06:20<16:30,  3.55it/s]

Epoch: 2, Loss: 2.522827386856079


Processing epoch 01:  27%|██▋       | 1330/4850 [06:20<16:30,  3.55it/s]

Epoch: 2, Loss: 2.3017663955688477


Processing epoch 01:  27%|██▋       | 1331/4850 [06:20<16:28,  3.56it/s]

Epoch: 2, Loss: 3.325678825378418


Processing epoch 01:  27%|██▋       | 1332/4850 [06:20<16:27,  3.56it/s]

Epoch: 2, Loss: 2.7447049617767334


Processing epoch 01:  27%|██▋       | 1333/4850 [06:21<16:21,  3.58it/s]

Epoch: 2, Loss: 4.9558305740356445


Processing epoch 01:  28%|██▊       | 1334/4850 [06:21<16:26,  3.57it/s]

Epoch: 2, Loss: 2.607060194015503


Processing epoch 01:  28%|██▊       | 1335/4850 [06:21<16:42,  3.51it/s]

Epoch: 2, Loss: 2.323009490966797


Processing epoch 01:  28%|██▊       | 1336/4850 [06:21<16:36,  3.53it/s]

Epoch: 2, Loss: 2.3813438415527344


Processing epoch 01:  28%|██▊       | 1337/4850 [06:22<16:35,  3.53it/s]

Epoch: 2, Loss: 2.6426591873168945


Processing epoch 01:  28%|██▊       | 1338/4850 [06:22<16:32,  3.54it/s]

Epoch: 2, Loss: 2.521068572998047


Processing epoch 01:  28%|██▊       | 1339/4850 [06:22<16:32,  3.54it/s]

Epoch: 2, Loss: 2.5689845085144043


Processing epoch 01:  28%|██▊       | 1340/4850 [06:23<16:32,  3.54it/s]

Epoch: 2, Loss: 2.024965763092041


Processing epoch 01:  28%|██▊       | 1341/4850 [06:23<16:29,  3.55it/s]

Epoch: 2, Loss: 2.9541845321655273


Processing epoch 01:  28%|██▊       | 1342/4850 [06:23<16:32,  3.53it/s]

Epoch: 2, Loss: 2.619198799133301


Processing epoch 01:  28%|██▊       | 1343/4850 [06:23<16:30,  3.54it/s]

Epoch: 2, Loss: 3.166594982147217


Processing epoch 01:  28%|██▊       | 1344/4850 [06:24<16:32,  3.53it/s]

Epoch: 2, Loss: 3.054208278656006


Processing epoch 01:  28%|██▊       | 1345/4850 [06:24<16:39,  3.51it/s]

Epoch: 2, Loss: 2.884620428085327


Processing epoch 01:  28%|██▊       | 1346/4850 [06:24<16:55,  3.45it/s]

Epoch: 2, Loss: 2.6102068424224854


Processing epoch 01:  28%|██▊       | 1347/4850 [06:25<16:46,  3.48it/s]

Epoch: 2, Loss: 2.619781970977783


Processing epoch 01:  28%|██▊       | 1348/4850 [06:25<16:38,  3.51it/s]

Epoch: 2, Loss: 2.5811238288879395


Processing epoch 01:  28%|██▊       | 1349/4850 [06:25<16:47,  3.47it/s]

Epoch: 2, Loss: 2.401801586151123


Processing epoch 01:  28%|██▊       | 1350/4850 [06:25<16:45,  3.48it/s]

Epoch: 2, Loss: 2.428128719329834


Processing epoch 01:  28%|██▊       | 1351/4850 [06:26<17:01,  3.42it/s]

Epoch: 2, Loss: 2.8718833923339844


Processing epoch 01:  28%|██▊       | 1352/4850 [06:26<17:12,  3.39it/s]

Epoch: 2, Loss: 1.9481056928634644


Processing epoch 01:  28%|██▊       | 1353/4850 [06:26<17:07,  3.40it/s]

Epoch: 2, Loss: 2.3778128623962402


Processing epoch 01:  28%|██▊       | 1354/4850 [06:27<17:19,  3.36it/s]

Epoch: 2, Loss: 2.612522840499878


Processing epoch 01:  28%|██▊       | 1355/4850 [06:27<17:24,  3.35it/s]

Epoch: 2, Loss: 2.508942127227783


Processing epoch 01:  28%|██▊       | 1356/4850 [06:27<17:28,  3.33it/s]

Epoch: 2, Loss: 3.0190110206604004


Processing epoch 01:  28%|██▊       | 1357/4850 [06:28<17:42,  3.29it/s]

Epoch: 2, Loss: 2.276167392730713


Processing epoch 01:  28%|██▊       | 1358/4850 [06:28<17:42,  3.29it/s]

Epoch: 2, Loss: 2.1601014137268066


Processing epoch 01:  28%|██▊       | 1359/4850 [06:28<17:21,  3.35it/s]

Epoch: 2, Loss: 1.958432674407959


Processing epoch 01:  28%|██▊       | 1360/4850 [06:28<17:03,  3.41it/s]

Epoch: 2, Loss: 2.8422231674194336


Processing epoch 01:  28%|██▊       | 1361/4850 [06:29<16:51,  3.45it/s]

Epoch: 2, Loss: 2.3427767753601074


Processing epoch 01:  28%|██▊       | 1362/4850 [06:29<16:41,  3.48it/s]

Epoch: 2, Loss: 2.70381498336792


Processing epoch 01:  28%|██▊       | 1363/4850 [06:29<16:36,  3.50it/s]

Epoch: 2, Loss: 2.788989305496216


Processing epoch 01:  28%|██▊       | 1364/4850 [06:30<16:42,  3.48it/s]

Epoch: 2, Loss: 2.5810229778289795


Processing epoch 01:  28%|██▊       | 1365/4850 [06:30<16:41,  3.48it/s]

Epoch: 2, Loss: 2.3838400840759277


Processing epoch 01:  28%|██▊       | 1366/4850 [06:30<16:36,  3.49it/s]

Epoch: 2, Loss: 3.1560118198394775


Processing epoch 01:  28%|██▊       | 1367/4850 [06:30<16:35,  3.50it/s]

Epoch: 2, Loss: 2.2301974296569824


Processing epoch 01:  28%|██▊       | 1368/4850 [06:31<16:32,  3.51it/s]

Epoch: 2, Loss: 4.620532512664795


Processing epoch 01:  28%|██▊       | 1369/4850 [06:31<16:31,  3.51it/s]

Epoch: 2, Loss: 3.131237506866455


Processing epoch 01:  28%|██▊       | 1370/4850 [06:31<16:29,  3.52it/s]

Epoch: 2, Loss: 3.1688027381896973


Processing epoch 01:  28%|██▊       | 1371/4850 [06:32<16:29,  3.52it/s]

Epoch: 2, Loss: 2.445523262023926


Processing epoch 01:  28%|██▊       | 1372/4850 [06:32<16:28,  3.52it/s]

Epoch: 2, Loss: 2.8395919799804688


Processing epoch 01:  28%|██▊       | 1373/4850 [06:32<16:27,  3.52it/s]

Epoch: 2, Loss: 2.192680597305298


Processing epoch 01:  28%|██▊       | 1374/4850 [06:32<16:25,  3.53it/s]

Epoch: 2, Loss: 2.3603994846343994


Processing epoch 01:  28%|██▊       | 1375/4850 [06:33<16:36,  3.49it/s]

Epoch: 2, Loss: 2.7894859313964844


Processing epoch 01:  28%|██▊       | 1376/4850 [06:33<16:27,  3.52it/s]

Epoch: 2, Loss: 2.601672649383545


Processing epoch 01:  28%|██▊       | 1377/4850 [06:33<16:22,  3.53it/s]

Epoch: 2, Loss: 2.555832624435425


Processing epoch 01:  28%|██▊       | 1378/4850 [06:34<16:20,  3.54it/s]

Epoch: 2, Loss: 2.6567065715789795


Processing epoch 01:  28%|██▊       | 1379/4850 [06:34<16:19,  3.54it/s]

Epoch: 2, Loss: 3.1077113151550293


Processing epoch 01:  28%|██▊       | 1380/4850 [06:34<16:21,  3.54it/s]

Epoch: 2, Loss: 2.7088205814361572


Processing epoch 01:  28%|██▊       | 1381/4850 [06:34<16:16,  3.55it/s]

Epoch: 2, Loss: 2.841940402984619


Processing epoch 01:  28%|██▊       | 1382/4850 [06:35<16:26,  3.52it/s]

Epoch: 2, Loss: 2.20163631439209


Processing epoch 01:  29%|██▊       | 1383/4850 [06:35<16:21,  3.53it/s]

Epoch: 2, Loss: 3.1392083168029785


Processing epoch 01:  29%|██▊       | 1384/4850 [06:35<16:22,  3.53it/s]

Epoch: 2, Loss: 2.2988953590393066


Processing epoch 01:  29%|██▊       | 1385/4850 [06:36<16:23,  3.52it/s]

Epoch: 2, Loss: 2.776094913482666


Processing epoch 01:  29%|██▊       | 1386/4850 [06:36<16:34,  3.48it/s]

Epoch: 2, Loss: 2.896280288696289


Processing epoch 01:  29%|██▊       | 1387/4850 [06:36<16:23,  3.52it/s]

Epoch: 2, Loss: 2.637937068939209


Processing epoch 01:  29%|██▊       | 1388/4850 [06:36<16:21,  3.53it/s]

Epoch: 2, Loss: 2.466557025909424


Processing epoch 01:  29%|██▊       | 1389/4850 [06:37<16:17,  3.54it/s]

Epoch: 2, Loss: 2.2343153953552246


Processing epoch 01:  29%|██▊       | 1390/4850 [06:37<16:21,  3.52it/s]

Epoch: 2, Loss: 2.4900150299072266


Processing epoch 01:  29%|██▊       | 1391/4850 [06:37<16:20,  3.53it/s]

Epoch: 2, Loss: 2.508488178253174


Processing epoch 01:  29%|██▊       | 1392/4850 [06:38<16:13,  3.55it/s]

Epoch: 2, Loss: 3.5835323333740234


Processing epoch 01:  29%|██▊       | 1393/4850 [06:38<16:14,  3.55it/s]

Epoch: 2, Loss: 2.8152942657470703


Processing epoch 01:  29%|██▊       | 1394/4850 [06:38<16:34,  3.47it/s]

Epoch: 2, Loss: 2.814067840576172


Processing epoch 01:  29%|██▉       | 1395/4850 [06:38<16:48,  3.43it/s]

Epoch: 2, Loss: 3.017030715942383


Processing epoch 01:  29%|██▉       | 1396/4850 [06:39<17:04,  3.37it/s]

Epoch: 2, Loss: 2.4773097038269043


Processing epoch 01:  29%|██▉       | 1397/4850 [06:39<17:03,  3.37it/s]

Epoch: 2, Loss: 2.4784340858459473


Processing epoch 01:  29%|██▉       | 1398/4850 [06:39<16:54,  3.40it/s]

Epoch: 2, Loss: 2.4355504512786865


Processing epoch 01:  29%|██▉       | 1399/4850 [06:40<16:36,  3.46it/s]

Epoch: 2, Loss: 2.717334747314453


Processing epoch 01:  29%|██▉       | 1400/4850 [06:40<16:30,  3.48it/s]

Epoch: 2, Loss: 2.25495982170105


Processing epoch 01:  29%|██▉       | 1401/4850 [06:40<16:45,  3.43it/s]

Epoch: 2, Loss: 2.349144458770752


Processing epoch 01:  29%|██▉       | 1402/4850 [06:40<16:55,  3.39it/s]

Epoch: 2, Loss: 2.6966500282287598


Processing epoch 01:  29%|██▉       | 1403/4850 [06:41<16:53,  3.40it/s]

Epoch: 2, Loss: 2.3668861389160156


Processing epoch 01:  29%|██▉       | 1404/4850 [06:41<16:51,  3.41it/s]

Epoch: 2, Loss: 3.005802631378174


Processing epoch 01:  29%|██▉       | 1405/4850 [06:41<16:59,  3.38it/s]

Epoch: 2, Loss: 2.7435240745544434


Processing epoch 01:  29%|██▉       | 1406/4850 [06:42<17:02,  3.37it/s]

Epoch: 2, Loss: 2.122547149658203


Processing epoch 01:  29%|██▉       | 1407/4850 [06:42<17:23,  3.30it/s]

Epoch: 2, Loss: 2.1067733764648438


Processing epoch 01:  29%|██▉       | 1408/4850 [06:42<17:10,  3.34it/s]

Epoch: 2, Loss: 2.517003059387207


Processing epoch 01:  29%|██▉       | 1409/4850 [06:43<16:56,  3.38it/s]

Epoch: 2, Loss: 2.3167543411254883


Processing epoch 01:  29%|██▉       | 1410/4850 [06:43<16:44,  3.42it/s]

Epoch: 2, Loss: 2.751960039138794


Processing epoch 01:  29%|██▉       | 1411/4850 [06:43<16:34,  3.46it/s]

Epoch: 2, Loss: 2.5851292610168457


Processing epoch 01:  29%|██▉       | 1412/4850 [06:43<16:27,  3.48it/s]

Epoch: 2, Loss: 3.2221789360046387


Processing epoch 01:  29%|██▉       | 1413/4850 [06:44<16:24,  3.49it/s]

Epoch: 2, Loss: 2.542712688446045


Processing epoch 01:  29%|██▉       | 1414/4850 [06:44<16:18,  3.51it/s]

Epoch: 2, Loss: 2.5599827766418457


Processing epoch 01:  29%|██▉       | 1415/4850 [06:44<16:22,  3.50it/s]

Epoch: 2, Loss: 2.6120662689208984


Processing epoch 01:  29%|██▉       | 1416/4850 [06:45<16:26,  3.48it/s]

Epoch: 2, Loss: 2.763245105743408


Processing epoch 01:  29%|██▉       | 1417/4850 [06:45<16:25,  3.49it/s]

Epoch: 2, Loss: 3.1030068397521973


Processing epoch 01:  29%|██▉       | 1418/4850 [06:45<16:20,  3.50it/s]

Epoch: 2, Loss: 2.1056599617004395


Processing epoch 01:  29%|██▉       | 1419/4850 [06:45<16:21,  3.50it/s]

Epoch: 2, Loss: 2.500561237335205


Processing epoch 01:  29%|██▉       | 1420/4850 [06:46<16:15,  3.52it/s]

Epoch: 2, Loss: 2.814728260040283


Processing epoch 01:  29%|██▉       | 1421/4850 [06:46<16:14,  3.52it/s]

Epoch: 2, Loss: 2.3602538108825684


Processing epoch 01:  29%|██▉       | 1422/4850 [06:46<16:05,  3.55it/s]

Epoch: 2, Loss: 3.6082892417907715


Processing epoch 01:  29%|██▉       | 1423/4850 [06:47<16:01,  3.57it/s]

Epoch: 2, Loss: 2.948180675506592


Processing epoch 01:  29%|██▉       | 1424/4850 [06:47<16:00,  3.57it/s]

Epoch: 2, Loss: 3.279921531677246


Processing epoch 01:  29%|██▉       | 1425/4850 [06:47<16:03,  3.55it/s]

Epoch: 2, Loss: 2.735079765319824


Processing epoch 01:  29%|██▉       | 1426/4850 [06:47<16:07,  3.54it/s]

Epoch: 2, Loss: 2.796875


Processing epoch 01:  29%|██▉       | 1427/4850 [06:48<16:05,  3.54it/s]

Epoch: 2, Loss: 2.6490893363952637


Processing epoch 01:  29%|██▉       | 1428/4850 [06:48<16:08,  3.53it/s]

Epoch: 2, Loss: 2.408097982406616


Processing epoch 01:  29%|██▉       | 1429/4850 [06:48<16:06,  3.54it/s]

Epoch: 2, Loss: 2.493420124053955


Processing epoch 01:  29%|██▉       | 1430/4850 [06:49<16:07,  3.54it/s]

Epoch: 2, Loss: 2.1600215435028076


Processing epoch 01:  30%|██▉       | 1431/4850 [06:49<16:08,  3.53it/s]

Epoch: 2, Loss: 2.648059844970703


Processing epoch 01:  30%|██▉       | 1432/4850 [06:49<16:06,  3.54it/s]

Epoch: 2, Loss: 2.488009214401245


Processing epoch 01:  30%|██▉       | 1433/4850 [06:49<16:05,  3.54it/s]

Epoch: 2, Loss: 2.592219352722168


Processing epoch 01:  30%|██▉       | 1434/4850 [06:50<16:03,  3.55it/s]

Epoch: 2, Loss: 3.4441983699798584


Processing epoch 01:  30%|██▉       | 1435/4850 [06:50<16:02,  3.55it/s]

Epoch: 2, Loss: 2.6457626819610596


Processing epoch 01:  30%|██▉       | 1436/4850 [06:50<16:04,  3.54it/s]

Epoch: 2, Loss: 3.0940756797790527


Processing epoch 01:  30%|██▉       | 1437/4850 [06:51<16:08,  3.53it/s]

Epoch: 2, Loss: 2.3779735565185547


Processing epoch 01:  30%|██▉       | 1438/4850 [06:51<16:06,  3.53it/s]

Epoch: 2, Loss: 3.1534619331359863


Processing epoch 01:  30%|██▉       | 1439/4850 [06:51<16:05,  3.53it/s]

Epoch: 2, Loss: 2.8646626472473145


Processing epoch 01:  30%|██▉       | 1440/4850 [06:51<16:05,  3.53it/s]

Epoch: 2, Loss: 2.535024642944336


Processing epoch 01:  30%|██▉       | 1441/4850 [06:52<16:01,  3.55it/s]

Epoch: 2, Loss: 2.874760866165161


Processing epoch 01:  30%|██▉       | 1442/4850 [06:52<16:00,  3.55it/s]

Epoch: 2, Loss: 3.932504892349243


Processing epoch 01:  30%|██▉       | 1443/4850 [06:52<16:00,  3.55it/s]

Epoch: 2, Loss: 2.9665658473968506


Processing epoch 01:  30%|██▉       | 1444/4850 [06:53<16:22,  3.47it/s]

Epoch: 2, Loss: 2.7059998512268066


Processing epoch 01:  30%|██▉       | 1445/4850 [06:53<16:43,  3.39it/s]

Epoch: 2, Loss: 2.5710668563842773


Processing epoch 01:  30%|██▉       | 1446/4850 [06:53<16:29,  3.44it/s]

Epoch: 2, Loss: 2.7546169757843018


Processing epoch 01:  30%|██▉       | 1447/4850 [06:53<16:20,  3.47it/s]

Epoch: 2, Loss: 2.468101739883423


Processing epoch 01:  30%|██▉       | 1448/4850 [06:54<16:33,  3.42it/s]

Epoch: 2, Loss: 2.289799690246582


Processing epoch 01:  30%|██▉       | 1449/4850 [06:54<16:39,  3.40it/s]

Epoch: 2, Loss: 2.7225635051727295


Processing epoch 01:  30%|██▉       | 1450/4850 [06:54<16:25,  3.45it/s]

Epoch: 2, Loss: 3.1351256370544434


Processing epoch 01:  30%|██▉       | 1451/4850 [06:55<16:29,  3.43it/s]

Epoch: 2, Loss: 2.4477458000183105


Processing epoch 01:  30%|██▉       | 1452/4850 [06:55<16:37,  3.41it/s]

Epoch: 2, Loss: 3.3051066398620605


Processing epoch 01:  30%|██▉       | 1453/4850 [06:55<16:49,  3.36it/s]

Epoch: 2, Loss: 2.2136213779449463


Processing epoch 01:  30%|██▉       | 1454/4850 [06:55<16:54,  3.35it/s]

Epoch: 2, Loss: 3.0234084129333496


Processing epoch 01:  30%|███       | 1455/4850 [06:56<16:41,  3.39it/s]

Epoch: 2, Loss: 2.7258129119873047


Processing epoch 01:  30%|███       | 1456/4850 [06:56<16:27,  3.44it/s]

Epoch: 2, Loss: 3.426403522491455


Processing epoch 01:  30%|███       | 1457/4850 [06:56<16:32,  3.42it/s]

Epoch: 2, Loss: 2.7253024578094482


Processing epoch 01:  30%|███       | 1458/4850 [06:57<16:57,  3.33it/s]

Epoch: 2, Loss: 2.9415364265441895


Processing epoch 01:  30%|███       | 1459/4850 [06:57<16:41,  3.39it/s]

Epoch: 2, Loss: 2.3460066318511963


Processing epoch 01:  30%|███       | 1460/4850 [06:57<16:33,  3.41it/s]

Epoch: 2, Loss: 2.3950071334838867


Processing epoch 01:  30%|███       | 1461/4850 [06:57<16:23,  3.44it/s]

Epoch: 2, Loss: 2.535763740539551


Processing epoch 01:  30%|███       | 1462/4850 [06:58<16:24,  3.44it/s]

Epoch: 2, Loss: 3.119831085205078


Processing epoch 01:  30%|███       | 1463/4850 [06:58<16:15,  3.47it/s]

Epoch: 2, Loss: 3.214305877685547


Processing epoch 01:  30%|███       | 1464/4850 [06:58<16:04,  3.51it/s]

Epoch: 2, Loss: 3.0227646827697754


Processing epoch 01:  30%|███       | 1465/4850 [06:59<16:00,  3.52it/s]

Epoch: 2, Loss: 2.5473082065582275


Processing epoch 01:  30%|███       | 1466/4850 [06:59<16:01,  3.52it/s]

Epoch: 2, Loss: 3.210606575012207


Processing epoch 01:  30%|███       | 1467/4850 [06:59<16:01,  3.52it/s]

Epoch: 2, Loss: 2.6600112915039062


Processing epoch 01:  30%|███       | 1468/4850 [06:59<15:58,  3.53it/s]

Epoch: 2, Loss: 2.7174530029296875


Processing epoch 01:  30%|███       | 1469/4850 [07:00<15:56,  3.53it/s]

Epoch: 2, Loss: 3.070612907409668


Processing epoch 01:  30%|███       | 1470/4850 [07:00<15:56,  3.53it/s]

Epoch: 2, Loss: 3.494631767272949


Processing epoch 01:  30%|███       | 1471/4850 [07:00<15:48,  3.56it/s]

Epoch: 2, Loss: 3.058382987976074


Processing epoch 01:  30%|███       | 1472/4850 [07:01<15:48,  3.56it/s]

Epoch: 2, Loss: 2.5109410285949707


Processing epoch 01:  30%|███       | 1473/4850 [07:01<15:58,  3.52it/s]

Epoch: 2, Loss: 2.5453739166259766


Processing epoch 01:  30%|███       | 1474/4850 [07:01<16:00,  3.52it/s]

Epoch: 2, Loss: 2.365586280822754


Processing epoch 01:  30%|███       | 1475/4850 [07:01<15:56,  3.53it/s]

Epoch: 2, Loss: 3.468456268310547


Processing epoch 01:  30%|███       | 1476/4850 [07:02<15:48,  3.56it/s]

Epoch: 2, Loss: 2.798933506011963


Processing epoch 01:  30%|███       | 1477/4850 [07:02<15:49,  3.55it/s]

Epoch: 2, Loss: 2.402266502380371


Processing epoch 01:  30%|███       | 1478/4850 [07:02<15:48,  3.55it/s]

Epoch: 2, Loss: 2.5268468856811523


Processing epoch 01:  30%|███       | 1479/4850 [07:03<15:49,  3.55it/s]

Epoch: 2, Loss: 2.106703758239746


Processing epoch 01:  31%|███       | 1480/4850 [07:03<15:51,  3.54it/s]

Epoch: 2, Loss: 2.709169387817383


Processing epoch 01:  31%|███       | 1481/4850 [07:03<15:53,  3.53it/s]

Epoch: 2, Loss: 2.3030824661254883


Processing epoch 01:  31%|███       | 1482/4850 [07:03<15:47,  3.55it/s]

Epoch: 2, Loss: 2.5348973274230957


Processing epoch 01:  31%|███       | 1483/4850 [07:04<15:45,  3.56it/s]

Epoch: 2, Loss: 3.3808629512786865


Processing epoch 01:  31%|███       | 1484/4850 [07:04<15:48,  3.55it/s]

Epoch: 2, Loss: 2.678084373474121


Processing epoch 01:  31%|███       | 1485/4850 [07:04<15:44,  3.56it/s]

Epoch: 2, Loss: 3.300760269165039


Processing epoch 01:  31%|███       | 1486/4850 [07:05<15:44,  3.56it/s]

Epoch: 2, Loss: 2.7824480533599854


Processing epoch 01:  31%|███       | 1487/4850 [07:05<15:44,  3.56it/s]

Epoch: 2, Loss: 3.2611031532287598


Processing epoch 01:  31%|███       | 1488/4850 [07:05<15:45,  3.55it/s]

Epoch: 2, Loss: 2.7703359127044678


Processing epoch 01:  31%|███       | 1489/4850 [07:05<15:47,  3.55it/s]

Epoch: 2, Loss: 2.6388912200927734


Processing epoch 01:  31%|███       | 1490/4850 [07:06<15:44,  3.56it/s]

Epoch: 2, Loss: 2.6493563652038574


Processing epoch 01:  31%|███       | 1491/4850 [07:06<15:42,  3.56it/s]

Epoch: 2, Loss: 2.715719699859619


Processing epoch 01:  31%|███       | 1492/4850 [07:06<15:42,  3.56it/s]

Epoch: 2, Loss: 2.0930614471435547


Processing epoch 01:  31%|███       | 1493/4850 [07:07<15:46,  3.55it/s]

Epoch: 2, Loss: 2.503488540649414


Processing epoch 01:  31%|███       | 1494/4850 [07:07<15:49,  3.53it/s]

Epoch: 2, Loss: 2.9844865798950195


Processing epoch 01:  31%|███       | 1495/4850 [07:07<16:03,  3.48it/s]

Epoch: 2, Loss: 3.2442312240600586


Processing epoch 01:  31%|███       | 1496/4850 [07:07<16:10,  3.46it/s]

Epoch: 2, Loss: 2.3936991691589355


Processing epoch 01:  31%|███       | 1497/4850 [07:08<16:11,  3.45it/s]

Epoch: 2, Loss: 2.391082525253296


Processing epoch 01:  31%|███       | 1498/4850 [07:08<16:03,  3.48it/s]

Epoch: 2, Loss: 2.5395426750183105


Processing epoch 01:  31%|███       | 1499/4850 [07:08<15:59,  3.49it/s]

Epoch: 2, Loss: 2.392733335494995


Processing epoch 01:  31%|███       | 1500/4850 [07:09<15:57,  3.50it/s]

Epoch: 2, Loss: 2.931766986846924


Processing epoch 01:  31%|███       | 1501/4850 [07:09<15:57,  3.50it/s]

Epoch: 2, Loss: 2.341805934906006


Processing epoch 01:  31%|███       | 1502/4850 [07:09<15:56,  3.50it/s]

Epoch: 2, Loss: 2.7525086402893066


Processing epoch 01:  31%|███       | 1503/4850 [07:09<16:03,  3.47it/s]

Epoch: 2, Loss: 2.2329623699188232


Processing epoch 01:  31%|███       | 1504/4850 [07:10<16:14,  3.43it/s]

Epoch: 2, Loss: 3.023481845855713


Processing epoch 01:  31%|███       | 1505/4850 [07:10<16:30,  3.38it/s]

Epoch: 2, Loss: 2.443650245666504


Processing epoch 01:  31%|███       | 1506/4850 [07:10<16:31,  3.37it/s]

Epoch: 2, Loss: 2.948939561843872


Processing epoch 01:  31%|███       | 1507/4850 [07:11<16:25,  3.39it/s]

Epoch: 2, Loss: 2.8947343826293945


Processing epoch 01:  31%|███       | 1508/4850 [07:11<16:40,  3.34it/s]

Epoch: 2, Loss: 2.909627914428711


Processing epoch 01:  31%|███       | 1509/4850 [07:11<16:26,  3.39it/s]

Epoch: 2, Loss: 2.823129653930664


Processing epoch 01:  31%|███       | 1510/4850 [07:11<16:11,  3.44it/s]

Epoch: 2, Loss: 2.347771644592285


Processing epoch 01:  31%|███       | 1511/4850 [07:12<16:01,  3.47it/s]

Epoch: 2, Loss: 2.2412922382354736


Processing epoch 01:  31%|███       | 1512/4850 [07:12<15:54,  3.50it/s]

Epoch: 2, Loss: 2.1492114067077637


Processing epoch 01:  31%|███       | 1513/4850 [07:12<15:55,  3.49it/s]

Epoch: 2, Loss: 2.971576690673828


Processing epoch 01:  31%|███       | 1514/4850 [07:13<15:48,  3.52it/s]

Epoch: 2, Loss: 3.0451602935791016


Processing epoch 01:  31%|███       | 1515/4850 [07:13<15:45,  3.53it/s]

Epoch: 2, Loss: 2.621082305908203


Processing epoch 01:  31%|███▏      | 1516/4850 [07:13<15:46,  3.52it/s]

Epoch: 2, Loss: 2.097722053527832


Processing epoch 01:  31%|███▏      | 1517/4850 [07:13<15:42,  3.53it/s]

Epoch: 2, Loss: 2.548004388809204


Processing epoch 01:  31%|███▏      | 1518/4850 [07:14<15:40,  3.54it/s]

Epoch: 2, Loss: 2.866621255874634


Processing epoch 01:  31%|███▏      | 1519/4850 [07:14<15:37,  3.55it/s]

Epoch: 2, Loss: 3.18115234375


Processing epoch 01:  31%|███▏      | 1520/4850 [07:14<15:40,  3.54it/s]

Epoch: 2, Loss: 2.4908156394958496


Processing epoch 01:  31%|███▏      | 1521/4850 [07:15<15:38,  3.55it/s]

Epoch: 2, Loss: 3.025768756866455


Processing epoch 01:  31%|███▏      | 1522/4850 [07:15<15:36,  3.55it/s]

Epoch: 2, Loss: 2.818908214569092


Processing epoch 01:  31%|███▏      | 1523/4850 [07:15<15:35,  3.56it/s]

Epoch: 2, Loss: 2.587352991104126


Processing epoch 01:  31%|███▏      | 1524/4850 [07:15<15:32,  3.57it/s]

Epoch: 2, Loss: 2.7267136573791504


Processing epoch 01:  31%|███▏      | 1525/4850 [07:16<15:33,  3.56it/s]

Epoch: 2, Loss: 2.5378737449645996


Processing epoch 01:  31%|███▏      | 1526/4850 [07:16<15:33,  3.56it/s]

Epoch: 2, Loss: 2.9143588542938232


Processing epoch 01:  31%|███▏      | 1527/4850 [07:16<15:31,  3.57it/s]

Epoch: 2, Loss: 2.467836380004883


Processing epoch 01:  32%|███▏      | 1528/4850 [07:17<15:37,  3.54it/s]

Epoch: 2, Loss: 2.1753053665161133


Processing epoch 01:  32%|███▏      | 1529/4850 [07:17<15:42,  3.52it/s]

Epoch: 2, Loss: 2.602787971496582


Processing epoch 01:  32%|███▏      | 1530/4850 [07:17<15:42,  3.52it/s]

Epoch: 2, Loss: 2.339158058166504


Processing epoch 01:  32%|███▏      | 1531/4850 [07:17<15:39,  3.53it/s]

Epoch: 2, Loss: 2.744457483291626


Processing epoch 01:  32%|███▏      | 1532/4850 [07:18<15:40,  3.53it/s]

Epoch: 2, Loss: 2.8197178840637207


Processing epoch 01:  32%|███▏      | 1533/4850 [07:18<15:40,  3.53it/s]

Epoch: 2, Loss: 2.7945661544799805


Processing epoch 01:  32%|███▏      | 1534/4850 [07:18<15:41,  3.52it/s]

Epoch: 2, Loss: 2.7353014945983887


Processing epoch 01:  32%|███▏      | 1535/4850 [07:19<15:51,  3.48it/s]

Epoch: 2, Loss: 2.6468124389648438


Processing epoch 01:  32%|███▏      | 1536/4850 [07:19<15:45,  3.50it/s]

Epoch: 2, Loss: 3.067440986633301


Processing epoch 01:  32%|███▏      | 1537/4850 [07:19<15:40,  3.52it/s]

Epoch: 2, Loss: 2.373969078063965


Processing epoch 01:  32%|███▏      | 1538/4850 [07:19<15:33,  3.55it/s]

Epoch: 2, Loss: 2.8867335319519043


Processing epoch 01:  32%|███▏      | 1539/4850 [07:20<15:31,  3.55it/s]

Epoch: 2, Loss: 2.689133644104004


Processing epoch 01:  32%|███▏      | 1540/4850 [07:20<15:33,  3.55it/s]

Epoch: 2, Loss: 2.488129138946533


Processing epoch 01:  32%|███▏      | 1541/4850 [07:20<15:35,  3.54it/s]

Epoch: 2, Loss: 2.4195384979248047


Processing epoch 01:  32%|███▏      | 1542/4850 [07:21<15:34,  3.54it/s]

Epoch: 2, Loss: 2.3497204780578613


Processing epoch 01:  32%|███▏      | 1543/4850 [07:21<15:36,  3.53it/s]

Epoch: 2, Loss: 2.4384865760803223


Processing epoch 01:  32%|███▏      | 1544/4850 [07:21<15:33,  3.54it/s]

Epoch: 2, Loss: 2.3972904682159424


Processing epoch 01:  32%|███▏      | 1545/4850 [07:21<15:52,  3.47it/s]

Epoch: 2, Loss: 3.2533912658691406


Processing epoch 01:  32%|███▏      | 1546/4850 [07:22<15:58,  3.45it/s]

Epoch: 2, Loss: 2.030942440032959


Processing epoch 01:  32%|███▏      | 1547/4850 [07:22<16:15,  3.39it/s]

Epoch: 2, Loss: 2.28779935836792


Processing epoch 01:  32%|███▏      | 1548/4850 [07:22<16:12,  3.39it/s]

Epoch: 2, Loss: 2.7768523693084717


Processing epoch 01:  32%|███▏      | 1549/4850 [07:23<15:59,  3.44it/s]

Epoch: 2, Loss: 2.5596261024475098


Processing epoch 01:  32%|███▏      | 1550/4850 [07:23<15:51,  3.47it/s]

Epoch: 2, Loss: 4.266836166381836


Processing epoch 01:  32%|███▏      | 1551/4850 [07:23<15:56,  3.45it/s]

Epoch: 2, Loss: 2.7567026615142822


Processing epoch 01:  32%|███▏      | 1552/4850 [07:23<15:50,  3.47it/s]

Epoch: 2, Loss: 2.9373717308044434


Processing epoch 01:  32%|███▏      | 1553/4850 [07:24<15:46,  3.48it/s]

Epoch: 2, Loss: 2.7285573482513428


Processing epoch 01:  32%|███▏      | 1554/4850 [07:24<15:54,  3.45it/s]

Epoch: 2, Loss: 2.860236644744873


Processing epoch 01:  32%|███▏      | 1555/4850 [07:24<15:55,  3.45it/s]

Epoch: 2, Loss: 2.6910386085510254


Processing epoch 01:  32%|███▏      | 1556/4850 [07:25<16:12,  3.39it/s]

Epoch: 2, Loss: 2.2725698947906494


Processing epoch 01:  32%|███▏      | 1557/4850 [07:25<16:28,  3.33it/s]

Epoch: 2, Loss: 3.872617721557617


Processing epoch 01:  32%|███▏      | 1558/4850 [07:25<16:11,  3.39it/s]

Epoch: 2, Loss: 2.5445139408111572


Processing epoch 01:  32%|███▏      | 1559/4850 [07:25<16:00,  3.43it/s]

Epoch: 2, Loss: 2.8546056747436523


Processing epoch 01:  32%|███▏      | 1560/4850 [07:26<15:51,  3.46it/s]

Epoch: 2, Loss: 2.3080992698669434


Processing epoch 01:  32%|███▏      | 1561/4850 [07:26<15:42,  3.49it/s]

Epoch: 2, Loss: 2.470864772796631


Processing epoch 01:  32%|███▏      | 1562/4850 [07:26<15:37,  3.51it/s]

Epoch: 2, Loss: 2.238414764404297


Processing epoch 01:  32%|███▏      | 1563/4850 [07:27<15:37,  3.51it/s]

Epoch: 2, Loss: 2.7242095470428467


Processing epoch 01:  32%|███▏      | 1564/4850 [07:27<15:40,  3.49it/s]

Epoch: 2, Loss: 2.685312271118164


Processing epoch 01:  32%|███▏      | 1565/4850 [07:27<15:39,  3.50it/s]

Epoch: 2, Loss: 2.6196346282958984


Processing epoch 01:  32%|███▏      | 1566/4850 [07:27<15:33,  3.52it/s]

Epoch: 2, Loss: 2.4173853397369385


Processing epoch 01:  32%|███▏      | 1567/4850 [07:28<15:32,  3.52it/s]

Epoch: 2, Loss: 3.021667957305908


Processing epoch 01:  32%|███▏      | 1568/4850 [07:28<15:46,  3.47it/s]

Epoch: 2, Loss: 2.0716686248779297


Processing epoch 01:  32%|███▏      | 1569/4850 [07:28<15:40,  3.49it/s]

Epoch: 2, Loss: 2.909234046936035


Processing epoch 01:  32%|███▏      | 1570/4850 [07:29<15:33,  3.51it/s]

Epoch: 2, Loss: 2.0635769367218018


Processing epoch 01:  32%|███▏      | 1571/4850 [07:29<15:33,  3.51it/s]

Epoch: 2, Loss: 2.8859996795654297


Processing epoch 01:  32%|███▏      | 1572/4850 [07:29<15:33,  3.51it/s]

Epoch: 2, Loss: 2.1272661685943604


Processing epoch 01:  32%|███▏      | 1573/4850 [07:29<15:29,  3.52it/s]

Epoch: 2, Loss: 2.6853690147399902


Processing epoch 01:  32%|███▏      | 1574/4850 [07:30<15:35,  3.50it/s]

Epoch: 2, Loss: 2.190683364868164


Processing epoch 01:  32%|███▏      | 1575/4850 [07:30<15:45,  3.46it/s]

Epoch: 2, Loss: 2.8575239181518555


Processing epoch 01:  32%|███▏      | 1576/4850 [07:30<15:39,  3.48it/s]

Epoch: 2, Loss: 2.4541516304016113


Processing epoch 01:  33%|███▎      | 1577/4850 [07:31<15:39,  3.48it/s]

Epoch: 2, Loss: 2.1542389392852783


Processing epoch 01:  33%|███▎      | 1578/4850 [07:31<15:33,  3.51it/s]

Epoch: 2, Loss: 2.9919471740722656


Processing epoch 01:  33%|███▎      | 1579/4850 [07:31<15:35,  3.50it/s]

Epoch: 2, Loss: 2.433835029602051


Processing epoch 01:  33%|███▎      | 1580/4850 [07:31<15:31,  3.51it/s]

Epoch: 2, Loss: 2.595249891281128


Processing epoch 01:  33%|███▎      | 1581/4850 [07:32<15:30,  3.51it/s]

Epoch: 2, Loss: 2.533870220184326


Processing epoch 01:  33%|███▎      | 1582/4850 [07:32<15:28,  3.52it/s]

Epoch: 2, Loss: 2.53151798248291


Processing epoch 01:  33%|███▎      | 1583/4850 [07:32<15:25,  3.53it/s]

Epoch: 2, Loss: 2.37949800491333


Processing epoch 01:  33%|███▎      | 1584/4850 [07:33<15:26,  3.53it/s]

Epoch: 2, Loss: 2.411186456680298


Processing epoch 01:  33%|███▎      | 1585/4850 [07:33<15:27,  3.52it/s]

Epoch: 2, Loss: 2.9433369636535645


Processing epoch 01:  33%|███▎      | 1586/4850 [07:33<15:40,  3.47it/s]

Epoch: 2, Loss: 2.723353862762451


Processing epoch 01:  33%|███▎      | 1587/4850 [07:33<15:32,  3.50it/s]

Epoch: 2, Loss: 2.4422805309295654


Processing epoch 01:  33%|███▎      | 1588/4850 [07:34<15:30,  3.51it/s]

Epoch: 2, Loss: 3.002152442932129


Processing epoch 01:  33%|███▎      | 1589/4850 [07:34<15:31,  3.50it/s]

Epoch: 2, Loss: 2.7071428298950195


Processing epoch 01:  33%|███▎      | 1590/4850 [07:34<15:32,  3.50it/s]

Epoch: 2, Loss: 2.5898289680480957


Processing epoch 01:  33%|███▎      | 1591/4850 [07:35<15:25,  3.52it/s]

Epoch: 2, Loss: 3.1600348949432373


Processing epoch 01:  33%|███▎      | 1592/4850 [07:35<15:27,  3.51it/s]

Epoch: 2, Loss: 2.4139997959136963


Processing epoch 01:  33%|███▎      | 1593/4850 [07:35<15:38,  3.47it/s]

Epoch: 2, Loss: 2.8002214431762695


Processing epoch 01:  33%|███▎      | 1594/4850 [07:35<15:33,  3.49it/s]

Epoch: 2, Loss: 2.5990068912506104


Processing epoch 01:  33%|███▎      | 1595/4850 [07:36<15:34,  3.48it/s]

Epoch: 2, Loss: 2.1537201404571533


Processing epoch 01:  33%|███▎      | 1596/4850 [07:36<15:35,  3.48it/s]

Epoch: 2, Loss: 2.881455898284912


Processing epoch 01:  33%|███▎      | 1597/4850 [07:36<15:45,  3.44it/s]

Epoch: 2, Loss: 3.265592098236084


Processing epoch 01:  33%|███▎      | 1598/4850 [07:37<15:38,  3.47it/s]

Epoch: 2, Loss: 1.9451336860656738


Processing epoch 01:  33%|███▎      | 1599/4850 [07:37<15:32,  3.49it/s]

Epoch: 2, Loss: 2.4774999618530273


Processing epoch 01:  33%|███▎      | 1600/4850 [07:37<15:30,  3.49it/s]

Epoch: 2, Loss: 2.680826425552368


Processing epoch 01:  33%|███▎      | 1601/4850 [07:37<15:45,  3.44it/s]

Epoch: 2, Loss: 2.269940137863159


Processing epoch 01:  33%|███▎      | 1602/4850 [07:38<15:46,  3.43it/s]

Epoch: 2, Loss: 2.918613910675049


Processing epoch 01:  33%|███▎      | 1603/4850 [07:38<15:57,  3.39it/s]

Epoch: 2, Loss: 2.154078483581543


Processing epoch 01:  33%|███▎      | 1604/4850 [07:38<15:57,  3.39it/s]

Epoch: 2, Loss: 3.077349901199341


Processing epoch 01:  33%|███▎      | 1605/4850 [07:39<15:53,  3.40it/s]

Epoch: 2, Loss: 1.9636831283569336


Processing epoch 01:  33%|███▎      | 1606/4850 [07:39<15:55,  3.40it/s]

Epoch: 2, Loss: 2.510195016860962


Processing epoch 01:  33%|███▎      | 1607/4850 [07:39<15:57,  3.39it/s]

Epoch: 2, Loss: 3.266254425048828


Processing epoch 01:  33%|███▎      | 1608/4850 [07:40<15:57,  3.39it/s]

Epoch: 2, Loss: 2.547550678253174


Processing epoch 01:  33%|███▎      | 1609/4850 [07:40<15:44,  3.43it/s]

Epoch: 2, Loss: 2.2902355194091797


Processing epoch 01:  33%|███▎      | 1610/4850 [07:40<15:37,  3.46it/s]

Epoch: 2, Loss: 3.2201476097106934


Processing epoch 01:  33%|███▎      | 1611/4850 [07:40<15:31,  3.48it/s]

Epoch: 2, Loss: 2.6459126472473145


Processing epoch 01:  33%|███▎      | 1612/4850 [07:41<15:27,  3.49it/s]

Epoch: 2, Loss: 2.7213869094848633


Processing epoch 01:  33%|███▎      | 1613/4850 [07:41<15:23,  3.50it/s]

Epoch: 2, Loss: 2.7408180236816406


Processing epoch 01:  33%|███▎      | 1614/4850 [07:41<15:26,  3.49it/s]

Epoch: 2, Loss: 2.7678794860839844


Processing epoch 01:  33%|███▎      | 1615/4850 [07:42<15:41,  3.43it/s]

Epoch: 2, Loss: 2.0049211978912354


Processing epoch 01:  33%|███▎      | 1616/4850 [07:42<15:43,  3.43it/s]

Epoch: 2, Loss: 2.2112443447113037


Processing epoch 01:  33%|███▎      | 1617/4850 [07:42<15:46,  3.42it/s]

Epoch: 2, Loss: 2.630373477935791


Processing epoch 01:  33%|███▎      | 1618/4850 [07:42<15:44,  3.42it/s]

Epoch: 2, Loss: 2.7865405082702637


Processing epoch 01:  33%|███▎      | 1619/4850 [07:43<15:56,  3.38it/s]

Epoch: 2, Loss: 2.4351751804351807


Processing epoch 01:  33%|███▎      | 1620/4850 [07:43<15:46,  3.41it/s]

Epoch: 2, Loss: 3.2446227073669434


Processing epoch 01:  33%|███▎      | 1621/4850 [07:43<15:58,  3.37it/s]

Epoch: 2, Loss: 2.8186235427856445


Processing epoch 01:  33%|███▎      | 1622/4850 [07:44<16:06,  3.34it/s]

Epoch: 2, Loss: 2.2656569480895996


Processing epoch 01:  33%|███▎      | 1623/4850 [07:44<15:51,  3.39it/s]

Epoch: 2, Loss: 3.440096616744995


Processing epoch 01:  33%|███▎      | 1624/4850 [07:44<15:56,  3.37it/s]

Epoch: 2, Loss: 2.559185266494751


Processing epoch 01:  34%|███▎      | 1625/4850 [07:45<15:56,  3.37it/s]

Epoch: 2, Loss: 2.5239663124084473


Processing epoch 01:  34%|███▎      | 1626/4850 [07:45<15:54,  3.38it/s]

Epoch: 2, Loss: 2.189157009124756


Processing epoch 01:  34%|███▎      | 1627/4850 [07:45<15:57,  3.37it/s]

Epoch: 2, Loss: 2.352609634399414


Processing epoch 01:  34%|███▎      | 1628/4850 [07:45<16:04,  3.34it/s]

Epoch: 2, Loss: 2.5978658199310303


Processing epoch 01:  34%|███▎      | 1629/4850 [07:46<15:54,  3.37it/s]

Epoch: 2, Loss: 2.3654775619506836


Processing epoch 01:  34%|███▎      | 1630/4850 [07:46<15:44,  3.41it/s]

Epoch: 2, Loss: 2.248641014099121


Processing epoch 01:  34%|███▎      | 1631/4850 [07:46<15:32,  3.45it/s]

Epoch: 2, Loss: 3.041996479034424


Processing epoch 01:  34%|███▎      | 1632/4850 [07:47<15:28,  3.47it/s]

Epoch: 2, Loss: 2.643432855606079


Processing epoch 01:  34%|███▎      | 1633/4850 [07:47<15:21,  3.49it/s]

Epoch: 2, Loss: 2.282818078994751


Processing epoch 01:  34%|███▎      | 1634/4850 [07:47<15:16,  3.51it/s]

Epoch: 2, Loss: 2.74623966217041


Processing epoch 01:  34%|███▎      | 1635/4850 [07:47<15:12,  3.52it/s]

Epoch: 2, Loss: 2.5212361812591553


Processing epoch 01:  34%|███▎      | 1636/4850 [07:48<15:10,  3.53it/s]

Epoch: 2, Loss: 2.556368589401245


Processing epoch 01:  34%|███▍      | 1637/4850 [07:48<15:21,  3.49it/s]

Epoch: 2, Loss: 2.4644699096679688


Processing epoch 01:  34%|███▍      | 1638/4850 [07:48<15:21,  3.49it/s]

Epoch: 2, Loss: 2.89560604095459


Processing epoch 01:  34%|███▍      | 1639/4850 [07:49<15:18,  3.50it/s]

Epoch: 2, Loss: 2.942676067352295


Processing epoch 01:  34%|███▍      | 1640/4850 [07:49<15:12,  3.52it/s]

Epoch: 2, Loss: 4.408877372741699


Processing epoch 01:  34%|███▍      | 1641/4850 [07:49<15:14,  3.51it/s]

Epoch: 2, Loss: 2.4883599281311035


Processing epoch 01:  34%|███▍      | 1642/4850 [07:49<15:12,  3.52it/s]

Epoch: 2, Loss: 2.7274441719055176


Processing epoch 01:  34%|███▍      | 1643/4850 [07:50<15:22,  3.48it/s]

Epoch: 2, Loss: 2.2076406478881836


Processing epoch 01:  34%|███▍      | 1644/4850 [07:50<15:26,  3.46it/s]

Epoch: 2, Loss: 3.256852626800537


Processing epoch 01:  34%|███▍      | 1645/4850 [07:50<15:54,  3.36it/s]

Epoch: 2, Loss: 2.6298444271087646


Processing epoch 01:  34%|███▍      | 1646/4850 [07:51<15:44,  3.39it/s]

Epoch: 2, Loss: 2.5633535385131836


Processing epoch 01:  34%|███▍      | 1647/4850 [07:51<15:39,  3.41it/s]

Epoch: 2, Loss: 2.5861282348632812


Processing epoch 01:  34%|███▍      | 1648/4850 [07:51<15:46,  3.38it/s]

Epoch: 2, Loss: 2.5611047744750977


Processing epoch 01:  34%|███▍      | 1649/4850 [07:52<16:04,  3.32it/s]

Epoch: 2, Loss: 2.2674996852874756


Processing epoch 01:  34%|███▍      | 1650/4850 [07:52<16:03,  3.32it/s]

Epoch: 2, Loss: 2.665811061859131


Processing epoch 01:  34%|███▍      | 1651/4850 [07:52<16:02,  3.32it/s]

Epoch: 2, Loss: 2.4770865440368652


Processing epoch 01:  34%|███▍      | 1652/4850 [07:52<15:57,  3.34it/s]

Epoch: 2, Loss: 2.4799857139587402


Processing epoch 01:  34%|███▍      | 1653/4850 [07:53<15:52,  3.36it/s]

Epoch: 2, Loss: 2.5954537391662598


Processing epoch 01:  34%|███▍      | 1654/4850 [07:53<15:59,  3.33it/s]

Epoch: 2, Loss: 3.0677199363708496


Processing epoch 01:  34%|███▍      | 1655/4850 [07:53<16:03,  3.32it/s]

Epoch: 2, Loss: 3.193603515625


Processing epoch 01:  34%|███▍      | 1656/4850 [07:54<15:59,  3.33it/s]

Epoch: 2, Loss: 3.0917415618896484


Processing epoch 01:  34%|███▍      | 1657/4850 [07:54<15:41,  3.39it/s]

Epoch: 2, Loss: 2.3181707859039307


Processing epoch 01:  34%|███▍      | 1658/4850 [07:54<15:28,  3.44it/s]

Epoch: 2, Loss: 2.3282644748687744


Processing epoch 01:  34%|███▍      | 1659/4850 [07:54<15:23,  3.46it/s]

Epoch: 2, Loss: 2.020017623901367


Processing epoch 01:  34%|███▍      | 1660/4850 [07:55<15:18,  3.47it/s]

Epoch: 2, Loss: 2.6993680000305176


Processing epoch 01:  34%|███▍      | 1661/4850 [07:55<15:13,  3.49it/s]

Epoch: 2, Loss: 2.7226524353027344


Processing epoch 01:  34%|███▍      | 1662/4850 [07:55<15:07,  3.51it/s]

Epoch: 2, Loss: 2.3318228721618652


Processing epoch 01:  34%|███▍      | 1663/4850 [07:56<15:10,  3.50it/s]

Epoch: 2, Loss: 2.9316859245300293


Processing epoch 01:  34%|███▍      | 1664/4850 [07:56<15:05,  3.52it/s]

Epoch: 2, Loss: 2.8666157722473145


Processing epoch 01:  34%|███▍      | 1665/4850 [07:56<15:02,  3.53it/s]

Epoch: 2, Loss: 2.4150848388671875


Processing epoch 01:  34%|███▍      | 1666/4850 [07:56<15:06,  3.51it/s]

Epoch: 2, Loss: 2.7962656021118164


Processing epoch 01:  34%|███▍      | 1667/4850 [07:57<15:04,  3.52it/s]

Epoch: 2, Loss: 2.851370334625244


Processing epoch 01:  34%|███▍      | 1668/4850 [07:57<15:05,  3.51it/s]

Epoch: 2, Loss: 2.5073368549346924


Processing epoch 01:  34%|███▍      | 1669/4850 [07:57<15:03,  3.52it/s]

Epoch: 2, Loss: 2.493607997894287


Processing epoch 01:  34%|███▍      | 1670/4850 [07:58<15:05,  3.51it/s]

Epoch: 2, Loss: 2.729766607284546


Processing epoch 01:  34%|███▍      | 1671/4850 [07:58<15:01,  3.53it/s]

Epoch: 2, Loss: 2.463329315185547


Processing epoch 01:  34%|███▍      | 1672/4850 [07:58<14:55,  3.55it/s]

Epoch: 2, Loss: 3.3218674659729004


Processing epoch 01:  34%|███▍      | 1673/4850 [07:58<14:54,  3.55it/s]

Epoch: 2, Loss: 2.722377061843872


Processing epoch 01:  35%|███▍      | 1674/4850 [07:59<14:56,  3.54it/s]

Epoch: 2, Loss: 2.5612919330596924


Processing epoch 01:  35%|███▍      | 1675/4850 [07:59<15:00,  3.53it/s]

Epoch: 2, Loss: 2.3727712631225586


Processing epoch 01:  35%|███▍      | 1676/4850 [07:59<14:57,  3.54it/s]

Epoch: 2, Loss: 2.6769936084747314


Processing epoch 01:  35%|███▍      | 1677/4850 [08:00<15:06,  3.50it/s]

Epoch: 2, Loss: 2.4901680946350098


Processing epoch 01:  35%|███▍      | 1678/4850 [08:00<15:02,  3.52it/s]

Epoch: 2, Loss: 2.4682841300964355


Processing epoch 01:  35%|███▍      | 1679/4850 [08:00<14:59,  3.52it/s]

Epoch: 2, Loss: 3.026935577392578


Processing epoch 01:  35%|███▍      | 1680/4850 [08:00<14:56,  3.53it/s]

Epoch: 2, Loss: 2.0596132278442383


Processing epoch 01:  35%|███▍      | 1681/4850 [08:01<14:57,  3.53it/s]

Epoch: 2, Loss: 2.27819561958313


Processing epoch 01:  35%|███▍      | 1682/4850 [08:01<14:57,  3.53it/s]

Epoch: 2, Loss: 2.250843048095703


Processing epoch 01:  35%|███▍      | 1683/4850 [08:01<14:58,  3.52it/s]

Epoch: 2, Loss: 2.729947090148926


Processing epoch 01:  35%|███▍      | 1684/4850 [08:02<15:07,  3.49it/s]

Epoch: 2, Loss: 2.627393960952759


Processing epoch 01:  35%|███▍      | 1685/4850 [08:02<15:03,  3.50it/s]

Epoch: 2, Loss: 2.200655698776245


Processing epoch 01:  35%|███▍      | 1686/4850 [08:02<14:59,  3.52it/s]

Epoch: 2, Loss: 2.143204689025879


Processing epoch 01:  35%|███▍      | 1687/4850 [08:02<14:53,  3.54it/s]

Epoch: 2, Loss: 2.989753484725952


Processing epoch 01:  35%|███▍      | 1688/4850 [08:03<14:50,  3.55it/s]

Epoch: 2, Loss: 2.6504275798797607


Processing epoch 01:  35%|███▍      | 1689/4850 [08:03<14:49,  3.55it/s]

Epoch: 2, Loss: 2.3878684043884277


Processing epoch 01:  35%|███▍      | 1690/4850 [08:03<14:55,  3.53it/s]

Epoch: 2, Loss: 2.4790353775024414


Processing epoch 01:  35%|███▍      | 1691/4850 [08:04<14:54,  3.53it/s]

Epoch: 2, Loss: 2.2658979892730713


Processing epoch 01:  35%|███▍      | 1692/4850 [08:04<14:59,  3.51it/s]

Epoch: 2, Loss: 2.1603243350982666


Processing epoch 01:  35%|███▍      | 1693/4850 [08:04<15:15,  3.45it/s]

Epoch: 2, Loss: 2.58316707611084


Processing epoch 01:  35%|███▍      | 1694/4850 [08:04<15:42,  3.35it/s]

Epoch: 2, Loss: 2.6882076263427734


Processing epoch 01:  35%|███▍      | 1695/4850 [08:05<15:36,  3.37it/s]

Epoch: 2, Loss: 2.310943126678467


Processing epoch 01:  35%|███▍      | 1696/4850 [08:05<15:33,  3.38it/s]

Epoch: 2, Loss: 2.700160264968872


Processing epoch 01:  35%|███▍      | 1697/4850 [08:05<15:41,  3.35it/s]

Epoch: 2, Loss: 2.7341127395629883


Processing epoch 01:  35%|███▌      | 1698/4850 [08:06<15:32,  3.38it/s]

Epoch: 2, Loss: 2.221182346343994


Processing epoch 01:  35%|███▌      | 1699/4850 [08:06<15:30,  3.39it/s]

Epoch: 2, Loss: 2.0725324153900146


Processing epoch 01:  35%|███▌      | 1700/4850 [08:06<15:41,  3.35it/s]

Epoch: 2, Loss: 2.4562394618988037


Processing epoch 01:  35%|███▌      | 1701/4850 [08:07<15:39,  3.35it/s]

Epoch: 2, Loss: 3.208723783493042


Processing epoch 01:  35%|███▌      | 1702/4850 [08:07<15:47,  3.32it/s]

Epoch: 2, Loss: 2.610290765762329


Processing epoch 01:  35%|███▌      | 1703/4850 [08:07<15:32,  3.38it/s]

Epoch: 2, Loss: 2.494016170501709


Processing epoch 01:  35%|███▌      | 1704/4850 [08:07<15:20,  3.42it/s]

Epoch: 2, Loss: 2.8728439807891846


Processing epoch 01:  35%|███▌      | 1705/4850 [08:08<15:31,  3.38it/s]

Epoch: 2, Loss: 2.7092862129211426


Processing epoch 01:  35%|███▌      | 1706/4850 [08:08<15:25,  3.40it/s]

Epoch: 2, Loss: 2.2102999687194824


Processing epoch 01:  35%|███▌      | 1707/4850 [08:08<15:12,  3.44it/s]

Epoch: 2, Loss: 2.724820137023926


Processing epoch 01:  35%|███▌      | 1708/4850 [08:09<15:05,  3.47it/s]

Epoch: 2, Loss: 2.396888256072998


Processing epoch 01:  35%|███▌      | 1709/4850 [08:09<15:00,  3.49it/s]

Epoch: 2, Loss: 2.552290439605713


Processing epoch 01:  35%|███▌      | 1710/4850 [08:09<14:56,  3.50it/s]

Epoch: 2, Loss: 2.530977249145508


Processing epoch 01:  35%|███▌      | 1711/4850 [08:09<14:52,  3.52it/s]

Epoch: 2, Loss: 2.4846389293670654


Processing epoch 01:  35%|███▌      | 1712/4850 [08:10<14:47,  3.54it/s]

Epoch: 2, Loss: 2.9061601161956787


Processing epoch 01:  35%|███▌      | 1713/4850 [08:10<15:07,  3.46it/s]

Epoch: 2, Loss: 2.2065985202789307


Processing epoch 01:  35%|███▌      | 1714/4850 [08:10<14:59,  3.48it/s]

Epoch: 2, Loss: 2.184418201446533


Processing epoch 01:  35%|███▌      | 1715/4850 [08:11<14:53,  3.51it/s]

Epoch: 2, Loss: 2.725463390350342


Processing epoch 01:  35%|███▌      | 1716/4850 [08:11<14:47,  3.53it/s]

Epoch: 2, Loss: 2.670901298522949


Processing epoch 01:  35%|███▌      | 1717/4850 [08:11<14:44,  3.54it/s]

Epoch: 2, Loss: 3.4810495376586914


Processing epoch 01:  35%|███▌      | 1718/4850 [08:11<14:43,  3.54it/s]

Epoch: 2, Loss: 2.3551414012908936


Processing epoch 01:  35%|███▌      | 1719/4850 [08:12<14:45,  3.54it/s]

Epoch: 2, Loss: 2.593903064727783


Processing epoch 01:  35%|███▌      | 1720/4850 [08:12<14:48,  3.52it/s]

Epoch: 2, Loss: 2.6881155967712402


Processing epoch 01:  35%|███▌      | 1721/4850 [08:12<14:46,  3.53it/s]

Epoch: 2, Loss: 2.6047141551971436


Processing epoch 01:  36%|███▌      | 1722/4850 [08:13<14:48,  3.52it/s]

Epoch: 2, Loss: 3.148068904876709


Processing epoch 01:  36%|███▌      | 1723/4850 [08:13<14:51,  3.51it/s]

Epoch: 2, Loss: 2.9790291786193848


Processing epoch 01:  36%|███▌      | 1724/4850 [08:13<15:02,  3.46it/s]

Epoch: 2, Loss: 2.3180410861968994


Processing epoch 01:  36%|███▌      | 1725/4850 [08:13<14:58,  3.48it/s]

Epoch: 2, Loss: 2.1031229496002197


Processing epoch 01:  36%|███▌      | 1726/4850 [08:14<14:48,  3.52it/s]

Epoch: 2, Loss: 3.060877561569214


Processing epoch 01:  36%|███▌      | 1727/4850 [08:14<14:46,  3.52it/s]

Epoch: 2, Loss: 2.7203574180603027


Processing epoch 01:  36%|███▌      | 1728/4850 [08:14<14:46,  3.52it/s]

Epoch: 2, Loss: 2.683084487915039


Processing epoch 01:  36%|███▌      | 1729/4850 [08:15<14:43,  3.53it/s]

Epoch: 2, Loss: 2.7770252227783203


Processing epoch 01:  36%|███▌      | 1730/4850 [08:15<14:51,  3.50it/s]

Epoch: 2, Loss: 2.2724475860595703


Processing epoch 01:  36%|███▌      | 1731/4850 [08:15<14:45,  3.52it/s]

Epoch: 2, Loss: 3.104214668273926


Processing epoch 01:  36%|███▌      | 1732/4850 [08:15<14:41,  3.54it/s]

Epoch: 2, Loss: 2.9457154273986816


Processing epoch 01:  36%|███▌      | 1733/4850 [08:16<14:40,  3.54it/s]

Epoch: 2, Loss: 2.363694667816162


Processing epoch 01:  36%|███▌      | 1734/4850 [08:16<14:37,  3.55it/s]

Epoch: 2, Loss: 2.938082456588745


Processing epoch 01:  36%|███▌      | 1735/4850 [08:16<14:45,  3.52it/s]

Epoch: 2, Loss: 2.859436511993408


Processing epoch 01:  36%|███▌      | 1736/4850 [08:17<14:39,  3.54it/s]

Epoch: 2, Loss: 2.7835657596588135


Processing epoch 01:  36%|███▌      | 1737/4850 [08:17<14:39,  3.54it/s]

Epoch: 2, Loss: 3.127717971801758


Processing epoch 01:  36%|███▌      | 1738/4850 [08:17<14:45,  3.52it/s]

Epoch: 2, Loss: 2.7949140071868896


Processing epoch 01:  36%|███▌      | 1739/4850 [08:17<14:44,  3.52it/s]

Epoch: 2, Loss: 2.3534319400787354


Processing epoch 01:  36%|███▌      | 1740/4850 [08:18<14:41,  3.53it/s]

Epoch: 2, Loss: 2.997019052505493


Processing epoch 01:  36%|███▌      | 1741/4850 [08:18<14:43,  3.52it/s]

Epoch: 2, Loss: 2.1034340858459473


Processing epoch 01:  36%|███▌      | 1742/4850 [08:18<14:55,  3.47it/s]

Epoch: 2, Loss: 3.4767003059387207


Processing epoch 01:  36%|███▌      | 1743/4850 [08:19<14:54,  3.47it/s]

Epoch: 2, Loss: 2.414336681365967


Processing epoch 01:  36%|███▌      | 1744/4850 [08:19<14:55,  3.47it/s]

Epoch: 2, Loss: 2.6636385917663574


Processing epoch 01:  36%|███▌      | 1745/4850 [08:19<15:03,  3.44it/s]

Epoch: 2, Loss: 2.695019245147705


Processing epoch 01:  36%|███▌      | 1746/4850 [08:19<15:11,  3.41it/s]

Epoch: 2, Loss: 2.3465147018432617


Processing epoch 01:  36%|███▌      | 1747/4850 [08:20<15:33,  3.32it/s]

Epoch: 2, Loss: 2.6960840225219727


Processing epoch 01:  36%|███▌      | 1748/4850 [08:20<15:30,  3.33it/s]

Epoch: 2, Loss: 2.35542631149292


Processing epoch 01:  36%|███▌      | 1749/4850 [08:20<15:15,  3.39it/s]

Epoch: 2, Loss: 2.63269305229187


Processing epoch 01:  36%|███▌      | 1750/4850 [08:21<15:20,  3.37it/s]

Epoch: 2, Loss: 2.5713624954223633


Processing epoch 01:  36%|███▌      | 1751/4850 [08:21<15:26,  3.34it/s]

Epoch: 2, Loss: 2.630868434906006


Processing epoch 01:  36%|███▌      | 1752/4850 [08:21<15:30,  3.33it/s]

Epoch: 2, Loss: 2.644477605819702


Processing epoch 01:  36%|███▌      | 1753/4850 [08:22<15:37,  3.30it/s]

Epoch: 2, Loss: 2.662283420562744


Processing epoch 01:  36%|███▌      | 1754/4850 [08:22<15:39,  3.30it/s]

Epoch: 2, Loss: 2.6156656742095947


Processing epoch 01:  36%|███▌      | 1755/4850 [08:22<15:38,  3.30it/s]

Epoch: 2, Loss: 2.261261224746704


Processing epoch 01:  36%|███▌      | 1756/4850 [08:22<15:17,  3.37it/s]

Epoch: 2, Loss: 2.544708490371704


Processing epoch 01:  36%|███▌      | 1757/4850 [08:23<15:05,  3.42it/s]

Epoch: 2, Loss: 2.7826805114746094


Processing epoch 01:  36%|███▌      | 1758/4850 [08:23<14:55,  3.45it/s]

Epoch: 2, Loss: 3.419992685317993


Processing epoch 01:  36%|███▋      | 1759/4850 [08:23<14:50,  3.47it/s]

Epoch: 2, Loss: 2.6780996322631836


Processing epoch 01:  36%|███▋      | 1760/4850 [08:24<14:45,  3.49it/s]

Epoch: 2, Loss: 2.661860942840576


Processing epoch 01:  36%|███▋      | 1761/4850 [08:24<14:36,  3.53it/s]

Epoch: 2, Loss: 3.9399356842041016


Processing epoch 01:  36%|███▋      | 1762/4850 [08:24<14:39,  3.51it/s]

Epoch: 2, Loss: 2.6631219387054443


Processing epoch 01:  36%|███▋      | 1763/4850 [08:24<14:37,  3.52it/s]

Epoch: 2, Loss: 3.2930805683135986


Processing epoch 01:  36%|███▋      | 1764/4850 [08:25<14:51,  3.46it/s]

Epoch: 2, Loss: 2.6617140769958496


Processing epoch 01:  36%|███▋      | 1765/4850 [08:25<14:43,  3.49it/s]

Epoch: 2, Loss: 2.693134307861328


Processing epoch 01:  36%|███▋      | 1766/4850 [08:25<14:40,  3.50it/s]

Epoch: 2, Loss: 2.7350635528564453


Processing epoch 01:  36%|███▋      | 1767/4850 [08:26<14:35,  3.52it/s]

Epoch: 2, Loss: 2.6499133110046387


Processing epoch 01:  36%|███▋      | 1768/4850 [08:26<14:34,  3.53it/s]

Epoch: 2, Loss: 2.3027641773223877


Processing epoch 01:  36%|███▋      | 1769/4850 [08:26<14:30,  3.54it/s]

Epoch: 2, Loss: 2.836430549621582


Processing epoch 01:  36%|███▋      | 1770/4850 [08:26<14:31,  3.54it/s]

Epoch: 2, Loss: 2.72450590133667


Processing epoch 01:  37%|███▋      | 1771/4850 [08:27<14:29,  3.54it/s]

Epoch: 2, Loss: 2.324172258377075


Processing epoch 01:  37%|███▋      | 1772/4850 [08:27<14:35,  3.52it/s]

Epoch: 2, Loss: 2.2606897354125977


Processing epoch 01:  37%|███▋      | 1773/4850 [08:27<14:34,  3.52it/s]

Epoch: 2, Loss: 2.088510751724243


Processing epoch 01:  37%|███▋      | 1774/4850 [08:28<14:35,  3.51it/s]

Epoch: 2, Loss: 2.5380659103393555


Processing epoch 01:  37%|███▋      | 1775/4850 [08:28<14:40,  3.49it/s]

Epoch: 2, Loss: 2.4389097690582275


Processing epoch 01:  37%|███▋      | 1776/4850 [08:28<14:40,  3.49it/s]

Epoch: 2, Loss: 2.502890110015869


Processing epoch 01:  37%|███▋      | 1777/4850 [08:28<14:33,  3.52it/s]

Epoch: 2, Loss: 2.574357032775879


Processing epoch 01:  37%|███▋      | 1778/4850 [08:29<14:32,  3.52it/s]

Epoch: 2, Loss: 2.4320011138916016


Processing epoch 01:  37%|███▋      | 1779/4850 [08:29<14:34,  3.51it/s]

Epoch: 2, Loss: 2.161724805831909


Processing epoch 01:  37%|███▋      | 1780/4850 [08:29<14:33,  3.52it/s]

Epoch: 2, Loss: 2.9880471229553223


Processing epoch 01:  37%|███▋      | 1781/4850 [08:30<14:31,  3.52it/s]

Epoch: 2, Loss: 3.133457660675049


Processing epoch 01:  37%|███▋      | 1782/4850 [08:30<14:26,  3.54it/s]

Epoch: 2, Loss: 2.283228874206543


Processing epoch 01:  37%|███▋      | 1783/4850 [08:30<14:29,  3.53it/s]

Epoch: 2, Loss: 2.2834575176239014


Processing epoch 01:  37%|███▋      | 1784/4850 [08:30<14:30,  3.52it/s]

Epoch: 2, Loss: 2.6912405490875244


Processing epoch 01:  37%|███▋      | 1785/4850 [08:31<14:28,  3.53it/s]

Epoch: 2, Loss: 2.9174599647521973


Processing epoch 01:  37%|███▋      | 1786/4850 [08:31<14:35,  3.50it/s]

Epoch: 2, Loss: 2.669804573059082


Processing epoch 01:  37%|███▋      | 1787/4850 [08:31<14:28,  3.53it/s]

Epoch: 2, Loss: 2.7166171073913574


Processing epoch 01:  37%|███▋      | 1788/4850 [08:31<14:25,  3.54it/s]

Epoch: 2, Loss: 2.7941675186157227


Processing epoch 01:  37%|███▋      | 1789/4850 [08:32<14:25,  3.54it/s]

Epoch: 2, Loss: 2.2242679595947266


Processing epoch 01:  37%|███▋      | 1790/4850 [08:32<14:26,  3.53it/s]

Epoch: 2, Loss: 2.0503501892089844


Processing epoch 01:  37%|███▋      | 1791/4850 [08:32<14:27,  3.52it/s]

Epoch: 2, Loss: 2.558783531188965


Processing epoch 01:  37%|███▋      | 1792/4850 [08:33<14:42,  3.46it/s]

Epoch: 2, Loss: 3.2663486003875732


Processing epoch 01:  37%|███▋      | 1793/4850 [08:33<15:05,  3.37it/s]

Epoch: 2, Loss: 2.6698031425476074


Processing epoch 01:  37%|███▋      | 1794/4850 [08:33<15:00,  3.39it/s]

Epoch: 2, Loss: 2.4880285263061523


Processing epoch 01:  37%|███▋      | 1795/4850 [08:34<14:54,  3.42it/s]

Epoch: 2, Loss: 2.9193711280822754


Processing epoch 01:  37%|███▋      | 1796/4850 [08:34<14:48,  3.44it/s]

Epoch: 2, Loss: 2.9806339740753174


Processing epoch 01:  37%|███▋      | 1797/4850 [08:34<14:54,  3.41it/s]

Epoch: 2, Loss: 2.581533908843994


Processing epoch 01:  37%|███▋      | 1798/4850 [08:34<14:45,  3.45it/s]

Epoch: 2, Loss: 2.7433857917785645


Processing epoch 01:  37%|███▋      | 1799/4850 [08:35<14:52,  3.42it/s]

Epoch: 2, Loss: 2.848029136657715


Processing epoch 01:  37%|███▋      | 1800/4850 [08:35<14:47,  3.44it/s]

Epoch: 2, Loss: 2.627091407775879


Processing epoch 01:  37%|███▋      | 1801/4850 [08:35<15:02,  3.38it/s]

Epoch: 2, Loss: 2.2423324584960938


Processing epoch 01:  37%|███▋      | 1802/4850 [08:36<15:01,  3.38it/s]

Epoch: 2, Loss: 2.603424072265625


Processing epoch 01:  37%|███▋      | 1803/4850 [08:36<14:46,  3.44it/s]

Epoch: 2, Loss: 2.6566519737243652


Processing epoch 01:  37%|███▋      | 1804/4850 [08:36<14:42,  3.45it/s]

Epoch: 2, Loss: 2.3098456859588623


Processing epoch 01:  37%|███▋      | 1805/4850 [08:36<14:46,  3.43it/s]

Epoch: 2, Loss: 2.2305173873901367


Processing epoch 01:  37%|███▋      | 1806/4850 [08:37<15:06,  3.36it/s]

Epoch: 2, Loss: 2.440484046936035


Processing epoch 01:  37%|███▋      | 1807/4850 [08:37<14:50,  3.42it/s]

Epoch: 2, Loss: 2.852447032928467


Processing epoch 01:  37%|███▋      | 1808/4850 [08:37<14:43,  3.44it/s]

Epoch: 2, Loss: 2.3754677772521973


Processing epoch 01:  37%|███▋      | 1809/4850 [08:38<14:38,  3.46it/s]

Epoch: 2, Loss: 2.8339028358459473


Processing epoch 01:  37%|███▋      | 1810/4850 [08:38<14:34,  3.48it/s]

Epoch: 2, Loss: 2.3476953506469727


Processing epoch 01:  37%|███▋      | 1811/4850 [08:38<14:30,  3.49it/s]

Epoch: 2, Loss: 2.2599592208862305


Processing epoch 01:  37%|███▋      | 1812/4850 [08:38<14:29,  3.49it/s]

Epoch: 2, Loss: 3.558957099914551


Processing epoch 01:  37%|███▋      | 1813/4850 [08:39<14:25,  3.51it/s]

Epoch: 2, Loss: 2.547361373901367


Processing epoch 01:  37%|███▋      | 1814/4850 [08:39<14:18,  3.54it/s]

Epoch: 2, Loss: 3.6889467239379883


Processing epoch 01:  37%|███▋      | 1815/4850 [08:39<14:31,  3.48it/s]

Epoch: 2, Loss: 1.8794598579406738


Processing epoch 01:  37%|███▋      | 1816/4850 [08:40<14:22,  3.52it/s]

Epoch: 2, Loss: 2.9308407306671143


Processing epoch 01:  37%|███▋      | 1817/4850 [08:40<14:22,  3.52it/s]

Epoch: 2, Loss: 2.7507615089416504


Processing epoch 01:  37%|███▋      | 1818/4850 [08:40<14:22,  3.51it/s]

Epoch: 2, Loss: 2.5824637413024902


Processing epoch 01:  38%|███▊      | 1819/4850 [08:40<14:19,  3.53it/s]

Epoch: 2, Loss: 2.4949607849121094


Processing epoch 01:  38%|███▊      | 1820/4850 [08:41<14:20,  3.52it/s]

Epoch: 2, Loss: 3.111435651779175


Processing epoch 01:  38%|███▊      | 1821/4850 [08:41<14:17,  3.53it/s]

Epoch: 2, Loss: 2.379697799682617


Processing epoch 01:  38%|███▊      | 1822/4850 [08:41<14:20,  3.52it/s]

Epoch: 2, Loss: 2.741908073425293


Processing epoch 01:  38%|███▊      | 1823/4850 [08:42<14:20,  3.52it/s]

Epoch: 2, Loss: 2.5732905864715576


Processing epoch 01:  38%|███▊      | 1824/4850 [08:42<14:17,  3.53it/s]

Epoch: 2, Loss: 2.5429461002349854


Processing epoch 01:  38%|███▊      | 1825/4850 [08:42<14:17,  3.53it/s]

Epoch: 2, Loss: 2.630699872970581


Processing epoch 01:  38%|███▊      | 1826/4850 [08:42<14:29,  3.48it/s]

Epoch: 2, Loss: 2.7966620922088623


Processing epoch 01:  38%|███▊      | 1827/4850 [08:43<14:24,  3.50it/s]

Epoch: 2, Loss: 2.1983659267425537


Processing epoch 01:  38%|███▊      | 1828/4850 [08:43<14:19,  3.52it/s]

Epoch: 2, Loss: 2.5167555809020996


Processing epoch 01:  38%|███▊      | 1829/4850 [08:43<14:16,  3.53it/s]

Epoch: 2, Loss: 2.321216583251953


Processing epoch 01:  38%|███▊      | 1830/4850 [08:44<14:10,  3.55it/s]

Epoch: 2, Loss: 3.457746744155884


Processing epoch 01:  38%|███▊      | 1831/4850 [08:44<14:12,  3.54it/s]

Epoch: 2, Loss: 3.0892703533172607


Processing epoch 01:  38%|███▊      | 1832/4850 [08:44<14:10,  3.55it/s]

Epoch: 2, Loss: 2.5876259803771973


Processing epoch 01:  38%|███▊      | 1833/4850 [08:44<14:12,  3.54it/s]

Epoch: 2, Loss: 2.1316378116607666


Processing epoch 01:  38%|███▊      | 1834/4850 [08:45<14:13,  3.53it/s]

Epoch: 2, Loss: 2.567735195159912


Processing epoch 01:  38%|███▊      | 1835/4850 [08:45<14:17,  3.52it/s]

Epoch: 2, Loss: 2.735567569732666


Processing epoch 01:  38%|███▊      | 1836/4850 [08:45<14:12,  3.54it/s]

Epoch: 2, Loss: 2.7316372394561768


Processing epoch 01:  38%|███▊      | 1837/4850 [08:46<14:24,  3.48it/s]

Epoch: 2, Loss: 2.720463752746582


Processing epoch 01:  38%|███▊      | 1838/4850 [08:46<14:24,  3.48it/s]

Epoch: 2, Loss: 2.7544822692871094


Processing epoch 01:  38%|███▊      | 1839/4850 [08:46<14:23,  3.49it/s]

Epoch: 2, Loss: 2.551089286804199


Processing epoch 01:  38%|███▊      | 1840/4850 [08:46<14:18,  3.50it/s]

Epoch: 2, Loss: 2.737433910369873


Processing epoch 01:  38%|███▊      | 1841/4850 [08:47<14:21,  3.49it/s]

Epoch: 2, Loss: 2.0143980979919434


Processing epoch 01:  38%|███▊      | 1842/4850 [08:47<14:40,  3.42it/s]

Epoch: 2, Loss: 2.5835928916931152


Processing epoch 01:  38%|███▊      | 1843/4850 [08:47<14:48,  3.39it/s]

Epoch: 2, Loss: 2.8743436336517334


Processing epoch 01:  38%|███▊      | 1844/4850 [08:48<14:55,  3.36it/s]

Epoch: 2, Loss: 2.9960227012634277


Processing epoch 01:  38%|███▊      | 1845/4850 [08:48<14:54,  3.36it/s]

Epoch: 2, Loss: 3.2052626609802246


Processing epoch 01:  38%|███▊      | 1846/4850 [08:48<15:05,  3.32it/s]

Epoch: 2, Loss: 2.393948554992676


Processing epoch 01:  38%|███▊      | 1847/4850 [08:49<14:58,  3.34it/s]

Epoch: 2, Loss: 2.696259021759033


Processing epoch 01:  38%|███▊      | 1848/4850 [08:49<14:50,  3.37it/s]

Epoch: 2, Loss: 2.3912911415100098


Processing epoch 01:  38%|███▊      | 1849/4850 [08:49<14:57,  3.34it/s]

Epoch: 2, Loss: 2.933825969696045


Processing epoch 01:  38%|███▊      | 1850/4850 [08:49<14:58,  3.34it/s]

Epoch: 2, Loss: 2.3011889457702637


Processing epoch 01:  38%|███▊      | 1851/4850 [08:50<15:00,  3.33it/s]

Epoch: 2, Loss: 2.5960307121276855


Processing epoch 01:  38%|███▊      | 1852/4850 [08:50<15:04,  3.32it/s]

Epoch: 2, Loss: 3.0670390129089355


Processing epoch 01:  38%|███▊      | 1853/4850 [08:50<15:04,  3.31it/s]

Epoch: 2, Loss: 2.554560422897339


Processing epoch 01:  38%|███▊      | 1854/4850 [08:51<15:00,  3.33it/s]

Epoch: 2, Loss: 2.148540735244751


Processing epoch 01:  38%|███▊      | 1855/4850 [08:51<15:13,  3.28it/s]

Epoch: 2, Loss: 2.5815136432647705


Processing epoch 01:  38%|███▊      | 1856/4850 [08:51<14:56,  3.34it/s]

Epoch: 2, Loss: 2.668097496032715


Processing epoch 01:  38%|███▊      | 1857/4850 [08:52<14:37,  3.41it/s]

Epoch: 2, Loss: 2.7983317375183105


Processing epoch 01:  38%|███▊      | 1858/4850 [08:52<14:27,  3.45it/s]

Epoch: 2, Loss: 2.4132614135742188


Processing epoch 01:  38%|███▊      | 1859/4850 [08:52<14:21,  3.47it/s]

Epoch: 2, Loss: 2.66133189201355


Processing epoch 01:  38%|███▊      | 1860/4850 [08:52<14:14,  3.50it/s]

Epoch: 2, Loss: 2.4705681800842285


Processing epoch 01:  38%|███▊      | 1861/4850 [08:53<14:11,  3.51it/s]

Epoch: 2, Loss: 2.6695716381073


Processing epoch 01:  38%|███▊      | 1862/4850 [08:53<14:12,  3.50it/s]

Epoch: 2, Loss: 2.4399924278259277


Processing epoch 01:  38%|███▊      | 1863/4850 [08:53<14:11,  3.51it/s]

Epoch: 2, Loss: 2.7667062282562256


Processing epoch 01:  38%|███▊      | 1864/4850 [08:53<14:11,  3.51it/s]

Epoch: 2, Loss: 2.638068199157715


Processing epoch 01:  38%|███▊      | 1865/4850 [08:54<14:06,  3.53it/s]

Epoch: 2, Loss: 3.200287342071533


Processing epoch 01:  38%|███▊      | 1866/4850 [08:54<14:05,  3.53it/s]

Epoch: 2, Loss: 2.490344524383545


Processing epoch 01:  38%|███▊      | 1867/4850 [08:54<14:01,  3.55it/s]

Epoch: 2, Loss: 2.662986993789673


Processing epoch 01:  39%|███▊      | 1868/4850 [08:55<14:00,  3.55it/s]

Epoch: 2, Loss: 2.17683744430542


Processing epoch 01:  39%|███▊      | 1869/4850 [08:55<14:02,  3.54it/s]

Epoch: 2, Loss: 2.804286003112793


Processing epoch 01:  39%|███▊      | 1870/4850 [08:55<14:07,  3.52it/s]

Epoch: 2, Loss: 2.6987085342407227


Processing epoch 01:  39%|███▊      | 1871/4850 [08:55<14:03,  3.53it/s]

Epoch: 2, Loss: 2.7224316596984863


Processing epoch 01:  39%|███▊      | 1872/4850 [08:56<14:03,  3.53it/s]

Epoch: 2, Loss: 2.590620517730713


Processing epoch 01:  39%|███▊      | 1873/4850 [08:56<14:02,  3.54it/s]

Epoch: 2, Loss: 2.6755800247192383


Processing epoch 01:  39%|███▊      | 1874/4850 [08:56<14:00,  3.54it/s]

Epoch: 2, Loss: 2.580939292907715


Processing epoch 01:  39%|███▊      | 1875/4850 [08:57<14:02,  3.53it/s]

Epoch: 2, Loss: 2.621356248855591


Processing epoch 01:  39%|███▊      | 1876/4850 [08:57<14:02,  3.53it/s]

Epoch: 2, Loss: 2.73291015625


Processing epoch 01:  39%|███▊      | 1877/4850 [08:57<14:11,  3.49it/s]

Epoch: 2, Loss: 2.704829216003418


Processing epoch 01:  39%|███▊      | 1878/4850 [08:57<14:04,  3.52it/s]

Epoch: 2, Loss: 2.6744496822357178


Processing epoch 01:  39%|███▊      | 1879/4850 [08:58<14:04,  3.52it/s]

Epoch: 2, Loss: 3.0951082706451416


Processing epoch 01:  39%|███▉      | 1880/4850 [08:58<14:14,  3.47it/s]

Epoch: 2, Loss: 2.8747658729553223


Processing epoch 01:  39%|███▉      | 1881/4850 [08:58<14:07,  3.50it/s]

Epoch: 2, Loss: 3.8291707038879395


Processing epoch 01:  39%|███▉      | 1882/4850 [08:59<14:05,  3.51it/s]

Epoch: 2, Loss: 2.183802366256714


Processing epoch 01:  39%|███▉      | 1883/4850 [08:59<14:03,  3.52it/s]

Epoch: 2, Loss: 3.6364598274230957


Processing epoch 01:  39%|███▉      | 1884/4850 [08:59<14:04,  3.51it/s]

Epoch: 2, Loss: 2.563005208969116


Processing epoch 01:  39%|███▉      | 1885/4850 [08:59<14:03,  3.51it/s]

Epoch: 2, Loss: 2.5776772499084473


Processing epoch 01:  39%|███▉      | 1886/4850 [09:00<14:02,  3.52it/s]

Epoch: 2, Loss: 2.9305741786956787


Processing epoch 01:  39%|███▉      | 1887/4850 [09:00<14:03,  3.51it/s]

Epoch: 2, Loss: 2.4085750579833984


Processing epoch 01:  39%|███▉      | 1888/4850 [09:00<13:59,  3.53it/s]

Epoch: 2, Loss: 3.2930397987365723


Processing epoch 01:  39%|███▉      | 1889/4850 [09:01<14:00,  3.52it/s]

Epoch: 2, Loss: 2.2931206226348877


Processing epoch 01:  39%|███▉      | 1890/4850 [09:01<14:01,  3.52it/s]

Epoch: 2, Loss: 2.385842800140381


Processing epoch 01:  39%|███▉      | 1891/4850 [09:01<14:21,  3.43it/s]

Epoch: 2, Loss: 2.1114935874938965


Processing epoch 01:  39%|███▉      | 1892/4850 [09:01<14:27,  3.41it/s]

Epoch: 2, Loss: 2.4185733795166016


Processing epoch 01:  39%|███▉      | 1893/4850 [09:02<14:28,  3.40it/s]

Epoch: 2, Loss: 2.7374675273895264


Processing epoch 01:  39%|███▉      | 1894/4850 [09:02<14:38,  3.37it/s]

Epoch: 2, Loss: 3.8285231590270996


Processing epoch 01:  39%|███▉      | 1895/4850 [09:02<14:48,  3.32it/s]

Epoch: 2, Loss: 3.1680948734283447


Processing epoch 01:  39%|███▉      | 1896/4850 [09:03<14:53,  3.30it/s]

Epoch: 2, Loss: 2.198127031326294


Processing epoch 01:  39%|███▉      | 1897/4850 [09:03<14:58,  3.29it/s]

Epoch: 2, Loss: 2.575986862182617


Processing epoch 01:  39%|███▉      | 1898/4850 [09:03<14:50,  3.31it/s]

Epoch: 2, Loss: 2.351781129837036


Processing epoch 01:  39%|███▉      | 1899/4850 [09:04<14:50,  3.31it/s]

Epoch: 2, Loss: 2.7144503593444824


Processing epoch 01:  39%|███▉      | 1900/4850 [09:04<14:42,  3.34it/s]

Epoch: 2, Loss: 3.7422962188720703


Processing epoch 01:  39%|███▉      | 1901/4850 [09:04<14:44,  3.33it/s]

Epoch: 2, Loss: 2.532410144805908


Processing epoch 01:  39%|███▉      | 1902/4850 [09:05<14:52,  3.30it/s]

Epoch: 2, Loss: 2.625839948654175


Processing epoch 01:  39%|███▉      | 1903/4850 [09:05<14:37,  3.36it/s]

Epoch: 2, Loss: 2.450369358062744


Processing epoch 01:  39%|███▉      | 1904/4850 [09:05<14:44,  3.33it/s]

Epoch: 2, Loss: 2.614896774291992


Processing epoch 01:  39%|███▉      | 1905/4850 [09:05<14:39,  3.35it/s]

Epoch: 2, Loss: 2.290400505065918


Processing epoch 01:  39%|███▉      | 1906/4850 [09:06<14:25,  3.40it/s]

Epoch: 2, Loss: 2.4298925399780273


Processing epoch 01:  39%|███▉      | 1907/4850 [09:06<14:17,  3.43it/s]

Epoch: 2, Loss: 2.423896312713623


Processing epoch 01:  39%|███▉      | 1908/4850 [09:06<14:08,  3.47it/s]

Epoch: 2, Loss: 2.420971393585205


Processing epoch 01:  39%|███▉      | 1909/4850 [09:07<14:01,  3.49it/s]

Epoch: 2, Loss: 3.1661465167999268


Processing epoch 01:  39%|███▉      | 1910/4850 [09:07<13:57,  3.51it/s]

Epoch: 2, Loss: 2.704202890396118


Processing epoch 01:  39%|███▉      | 1911/4850 [09:07<13:52,  3.53it/s]

Epoch: 2, Loss: 2.791354179382324


Processing epoch 01:  39%|███▉      | 1912/4850 [09:07<14:02,  3.49it/s]

Epoch: 2, Loss: 2.5994949340820312


Processing epoch 01:  39%|███▉      | 1913/4850 [09:08<13:57,  3.51it/s]

Epoch: 2, Loss: 2.726165771484375


Processing epoch 01:  39%|███▉      | 1914/4850 [09:08<14:05,  3.47it/s]

Epoch: 2, Loss: 2.133316993713379


Processing epoch 01:  39%|███▉      | 1915/4850 [09:08<13:59,  3.50it/s]

Epoch: 2, Loss: 2.4948716163635254


Processing epoch 01:  40%|███▉      | 1916/4850 [09:09<13:57,  3.50it/s]

Epoch: 2, Loss: 2.4308276176452637


Processing epoch 01:  40%|███▉      | 1917/4850 [09:09<14:01,  3.49it/s]

Epoch: 2, Loss: 2.771149158477783


Processing epoch 01:  40%|███▉      | 1918/4850 [09:09<13:52,  3.52it/s]

Epoch: 2, Loss: 3.57226824760437


Processing epoch 01:  40%|███▉      | 1919/4850 [09:09<13:52,  3.52it/s]

Epoch: 2, Loss: 2.2683324813842773


Processing epoch 01:  40%|███▉      | 1920/4850 [09:10<13:51,  3.52it/s]

Epoch: 2, Loss: 2.1039109230041504


Processing epoch 01:  40%|███▉      | 1921/4850 [09:10<13:52,  3.52it/s]

Epoch: 2, Loss: 2.362283229827881


Processing epoch 01:  40%|███▉      | 1922/4850 [09:10<13:56,  3.50it/s]

Epoch: 2, Loss: 2.6128664016723633


Processing epoch 01:  40%|███▉      | 1923/4850 [09:11<14:00,  3.48it/s]

Epoch: 2, Loss: 3.5607705116271973


Processing epoch 01:  40%|███▉      | 1924/4850 [09:11<14:00,  3.48it/s]

Epoch: 2, Loss: 2.15277361869812


Processing epoch 01:  40%|███▉      | 1925/4850 [09:11<13:59,  3.49it/s]

Epoch: 2, Loss: 2.5663647651672363


Processing epoch 01:  40%|███▉      | 1926/4850 [09:11<13:57,  3.49it/s]

Epoch: 2, Loss: 2.0881237983703613


Processing epoch 01:  40%|███▉      | 1927/4850 [09:12<13:53,  3.51it/s]

Epoch: 2, Loss: 2.134403944015503


Processing epoch 01:  40%|███▉      | 1928/4850 [09:12<13:50,  3.52it/s]

Epoch: 2, Loss: 3.0722694396972656


Processing epoch 01:  40%|███▉      | 1929/4850 [09:12<13:51,  3.51it/s]

Epoch: 2, Loss: 2.5849721431732178


Processing epoch 01:  40%|███▉      | 1930/4850 [09:13<13:57,  3.49it/s]

Epoch: 2, Loss: 2.567650556564331


Processing epoch 01:  40%|███▉      | 1931/4850 [09:13<13:50,  3.51it/s]

Epoch: 2, Loss: 2.559662342071533


Processing epoch 01:  40%|███▉      | 1932/4850 [09:13<13:49,  3.52it/s]

Epoch: 2, Loss: 2.6990628242492676


Processing epoch 01:  40%|███▉      | 1933/4850 [09:13<13:51,  3.51it/s]

Epoch: 2, Loss: 2.078951597213745


Processing epoch 01:  40%|███▉      | 1934/4850 [09:14<13:46,  3.53it/s]

Epoch: 2, Loss: 2.288748025894165


Processing epoch 01:  40%|███▉      | 1935/4850 [09:14<13:44,  3.54it/s]

Epoch: 2, Loss: 2.740609645843506


Processing epoch 01:  40%|███▉      | 1936/4850 [09:14<13:46,  3.52it/s]

Epoch: 2, Loss: 2.357515811920166


Processing epoch 01:  40%|███▉      | 1937/4850 [09:15<13:46,  3.53it/s]

Epoch: 2, Loss: 2.5514869689941406


Processing epoch 01:  40%|███▉      | 1938/4850 [09:15<13:46,  3.53it/s]

Epoch: 2, Loss: 2.292685031890869


Processing epoch 01:  40%|███▉      | 1939/4850 [09:15<13:47,  3.52it/s]

Epoch: 2, Loss: 2.8087058067321777


Processing epoch 01:  40%|████      | 1940/4850 [09:15<13:44,  3.53it/s]

Epoch: 2, Loss: 2.3218326568603516


Processing epoch 01:  40%|████      | 1941/4850 [09:16<14:00,  3.46it/s]

Epoch: 2, Loss: 2.5969290733337402


Processing epoch 01:  40%|████      | 1942/4850 [09:16<14:08,  3.43it/s]

Epoch: 2, Loss: 3.2317886352539062


Processing epoch 01:  40%|████      | 1943/4850 [09:16<13:58,  3.47it/s]

Epoch: 2, Loss: 3.3048393726348877


Processing epoch 01:  40%|████      | 1944/4850 [09:17<13:53,  3.49it/s]

Epoch: 2, Loss: 2.9826111793518066


Processing epoch 01:  40%|████      | 1945/4850 [09:17<13:49,  3.50it/s]

Epoch: 2, Loss: 2.609999418258667


Processing epoch 01:  40%|████      | 1946/4850 [09:17<13:49,  3.50it/s]

Epoch: 2, Loss: 2.741790294647217


Processing epoch 01:  40%|████      | 1947/4850 [09:17<13:56,  3.47it/s]

Epoch: 2, Loss: 2.5262198448181152


Processing epoch 01:  40%|████      | 1948/4850 [09:18<13:56,  3.47it/s]

Epoch: 2, Loss: 2.363386631011963


Processing epoch 01:  40%|████      | 1949/4850 [09:18<14:01,  3.45it/s]

Epoch: 2, Loss: 2.5273072719573975


Processing epoch 01:  40%|████      | 1950/4850 [09:18<14:06,  3.42it/s]

Epoch: 2, Loss: 3.6712324619293213


Processing epoch 01:  40%|████      | 1951/4850 [09:19<14:13,  3.40it/s]

Epoch: 2, Loss: 2.8273544311523438


Processing epoch 01:  40%|████      | 1952/4850 [09:19<14:11,  3.40it/s]

Epoch: 2, Loss: 3.326909303665161


Processing epoch 01:  40%|████      | 1953/4850 [09:19<14:01,  3.44it/s]

Epoch: 2, Loss: 2.7819149494171143


Processing epoch 01:  40%|████      | 1954/4850 [09:19<13:52,  3.48it/s]

Epoch: 2, Loss: 2.685342788696289


Processing epoch 01:  40%|████      | 1955/4850 [09:20<14:05,  3.42it/s]

Epoch: 2, Loss: 2.88624906539917


Processing epoch 01:  40%|████      | 1956/4850 [09:20<14:12,  3.39it/s]

Epoch: 2, Loss: 2.753448963165283


Processing epoch 01:  40%|████      | 1957/4850 [09:20<14:02,  3.44it/s]

Epoch: 2, Loss: 2.9162662029266357


Processing epoch 01:  40%|████      | 1958/4850 [09:21<13:57,  3.45it/s]

Epoch: 2, Loss: 2.520702362060547


Processing epoch 01:  40%|████      | 1959/4850 [09:21<13:51,  3.48it/s]

Epoch: 2, Loss: 2.518801689147949


Processing epoch 01:  40%|████      | 1960/4850 [09:21<13:48,  3.49it/s]

Epoch: 2, Loss: 2.5179665088653564


Processing epoch 01:  40%|████      | 1961/4850 [09:21<13:42,  3.51it/s]

Epoch: 2, Loss: 2.511032819747925


Processing epoch 01:  40%|████      | 1962/4850 [09:22<13:42,  3.51it/s]

Epoch: 2, Loss: 2.555561065673828


Processing epoch 01:  40%|████      | 1963/4850 [09:22<13:45,  3.50it/s]

Epoch: 2, Loss: 2.9561309814453125


Processing epoch 01:  40%|████      | 1964/4850 [09:22<13:43,  3.51it/s]

Epoch: 2, Loss: 2.83927583694458


Processing epoch 01:  41%|████      | 1965/4850 [09:23<13:40,  3.52it/s]

Epoch: 2, Loss: 2.584555149078369


Processing epoch 01:  41%|████      | 1966/4850 [09:23<13:40,  3.51it/s]

Epoch: 2, Loss: 3.006403923034668


Processing epoch 01:  41%|████      | 1967/4850 [09:23<13:39,  3.52it/s]

Epoch: 2, Loss: 2.6299448013305664


Processing epoch 01:  41%|████      | 1968/4850 [09:23<13:40,  3.51it/s]

Epoch: 2, Loss: 2.4976415634155273


Processing epoch 01:  41%|████      | 1969/4850 [09:24<13:38,  3.52it/s]

Epoch: 2, Loss: 2.9957704544067383


Processing epoch 01:  41%|████      | 1970/4850 [09:24<13:38,  3.52it/s]

Epoch: 2, Loss: 2.4558327198028564


Processing epoch 01:  41%|████      | 1971/4850 [09:24<13:39,  3.51it/s]

Epoch: 2, Loss: 2.471190929412842


Processing epoch 01:  41%|████      | 1972/4850 [09:25<13:37,  3.52it/s]

Epoch: 2, Loss: 2.4831438064575195


Processing epoch 01:  41%|████      | 1973/4850 [09:25<13:36,  3.52it/s]

Epoch: 2, Loss: 2.6370699405670166


Processing epoch 01:  41%|████      | 1974/4850 [09:25<13:35,  3.53it/s]

Epoch: 2, Loss: 2.6556358337402344


Processing epoch 01:  41%|████      | 1975/4850 [09:25<13:35,  3.53it/s]

Epoch: 2, Loss: 2.4697015285491943


Processing epoch 01:  41%|████      | 1976/4850 [09:26<13:34,  3.53it/s]

Epoch: 2, Loss: 2.392353057861328


Processing epoch 01:  41%|████      | 1977/4850 [09:26<13:34,  3.53it/s]

Epoch: 2, Loss: 2.417247772216797


Processing epoch 01:  41%|████      | 1978/4850 [09:26<13:35,  3.52it/s]

Epoch: 2, Loss: 2.523526668548584


Processing epoch 01:  41%|████      | 1979/4850 [09:27<13:29,  3.54it/s]

Epoch: 2, Loss: 3.266871452331543


Processing epoch 01:  41%|████      | 1980/4850 [09:27<13:35,  3.52it/s]

Epoch: 2, Loss: 2.7258734703063965


Processing epoch 01:  41%|████      | 1981/4850 [09:27<13:48,  3.46it/s]

Epoch: 2, Loss: 2.514913558959961


Processing epoch 01:  41%|████      | 1982/4850 [09:27<13:45,  3.47it/s]

Epoch: 2, Loss: 2.460196018218994


Processing epoch 01:  41%|████      | 1983/4850 [09:28<13:44,  3.48it/s]

Epoch: 2, Loss: 2.4685778617858887


Processing epoch 01:  41%|████      | 1984/4850 [09:28<13:41,  3.49it/s]

Epoch: 2, Loss: 2.7532148361206055


Processing epoch 01:  41%|████      | 1985/4850 [09:28<13:40,  3.49it/s]

Epoch: 2, Loss: 3.2559871673583984


Processing epoch 01:  41%|████      | 1986/4850 [09:29<13:37,  3.50it/s]

Epoch: 2, Loss: 2.395108699798584


Processing epoch 01:  41%|████      | 1987/4850 [09:29<13:40,  3.49it/s]

Epoch: 2, Loss: 2.848238706588745


Processing epoch 01:  41%|████      | 1988/4850 [09:29<13:38,  3.50it/s]

Epoch: 2, Loss: 2.429837226867676


Processing epoch 01:  41%|████      | 1989/4850 [09:29<13:35,  3.51it/s]

Epoch: 2, Loss: 2.4432902336120605


Processing epoch 01:  41%|████      | 1990/4850 [09:30<13:32,  3.52it/s]

Epoch: 2, Loss: 2.507558822631836


Processing epoch 01:  41%|████      | 1991/4850 [09:30<13:27,  3.54it/s]

Epoch: 2, Loss: 2.1584019660949707


Processing epoch 01:  41%|████      | 1992/4850 [09:30<13:46,  3.46it/s]

Epoch: 2, Loss: 3.213772773742676


Processing epoch 01:  41%|████      | 1993/4850 [09:31<13:47,  3.45it/s]

Epoch: 2, Loss: 2.796649932861328


Processing epoch 01:  41%|████      | 1994/4850 [09:31<13:38,  3.49it/s]

Epoch: 2, Loss: 3.1662187576293945


Processing epoch 01:  41%|████      | 1995/4850 [09:31<14:02,  3.39it/s]

Epoch: 2, Loss: 2.2655162811279297


Processing epoch 01:  41%|████      | 1996/4850 [09:31<14:06,  3.37it/s]

Epoch: 2, Loss: 2.350187301635742


Processing epoch 01:  41%|████      | 1997/4850 [09:32<13:57,  3.41it/s]

Epoch: 2, Loss: 2.425294876098633


Processing epoch 01:  41%|████      | 1998/4850 [09:32<13:45,  3.46it/s]

Epoch: 2, Loss: 2.795680046081543


Processing epoch 01:  41%|████      | 1999/4850 [09:32<13:50,  3.43it/s]

Epoch: 2, Loss: 2.4406137466430664


Processing epoch 01:  41%|████      | 2000/4850 [09:33<13:56,  3.41it/s]

Epoch: 2, Loss: 2.5494208335876465


Processing epoch 01:  41%|████▏     | 2001/4850 [09:33<13:57,  3.40it/s]

Epoch: 2, Loss: 2.213467836380005


Processing epoch 01:  41%|████▏     | 2002/4850 [09:33<13:50,  3.43it/s]

Epoch: 2, Loss: 3.211482048034668


Processing epoch 01:  41%|████▏     | 2003/4850 [09:34<14:03,  3.38it/s]

Epoch: 2, Loss: 2.599788188934326


Processing epoch 01:  41%|████▏     | 2004/4850 [09:34<13:56,  3.40it/s]

Epoch: 2, Loss: 2.5445942878723145


Processing epoch 01:  41%|████▏     | 2005/4850 [09:34<13:46,  3.44it/s]

Epoch: 2, Loss: 2.584930896759033


Processing epoch 01:  41%|████▏     | 2006/4850 [09:34<14:14,  3.33it/s]

Epoch: 2, Loss: 2.684335231781006


Processing epoch 01:  41%|████▏     | 2007/4850 [09:35<14:01,  3.38it/s]

Epoch: 2, Loss: 2.767551898956299


Processing epoch 01:  41%|████▏     | 2008/4850 [09:35<13:49,  3.42it/s]

Epoch: 2, Loss: 2.7722771167755127


Processing epoch 01:  41%|████▏     | 2009/4850 [09:35<13:47,  3.43it/s]

Epoch: 2, Loss: 2.715369939804077


Processing epoch 01:  41%|████▏     | 2010/4850 [09:36<13:49,  3.42it/s]

Epoch: 2, Loss: 2.3469433784484863


Processing epoch 01:  41%|████▏     | 2011/4850 [09:36<13:41,  3.46it/s]

Epoch: 2, Loss: 2.1658458709716797


Processing epoch 01:  41%|████▏     | 2012/4850 [09:36<13:35,  3.48it/s]

Epoch: 2, Loss: 2.4338059425354004


Processing epoch 01:  42%|████▏     | 2013/4850 [09:36<13:28,  3.51it/s]

Epoch: 2, Loss: 4.51262092590332


Processing epoch 01:  42%|████▏     | 2014/4850 [09:37<13:30,  3.50it/s]

Epoch: 2, Loss: 3.138521671295166


Processing epoch 01:  42%|████▏     | 2015/4850 [09:37<13:25,  3.52it/s]

Epoch: 2, Loss: 2.151242733001709


Processing epoch 01:  42%|████▏     | 2016/4850 [09:37<13:25,  3.52it/s]

Epoch: 2, Loss: 2.926374673843384


Processing epoch 01:  42%|████▏     | 2017/4850 [09:38<13:25,  3.52it/s]

Epoch: 2, Loss: 2.6612634658813477


Processing epoch 01:  42%|████▏     | 2018/4850 [09:38<13:23,  3.53it/s]

Epoch: 2, Loss: 3.039808750152588


Processing epoch 01:  42%|████▏     | 2019/4850 [09:38<13:23,  3.53it/s]

Epoch: 2, Loss: 2.4384336471557617


Processing epoch 01:  42%|████▏     | 2020/4850 [09:38<13:19,  3.54it/s]

Epoch: 2, Loss: 2.1710569858551025


Processing epoch 01:  42%|████▏     | 2021/4850 [09:39<13:27,  3.50it/s]

Epoch: 2, Loss: 2.918318748474121


Processing epoch 01:  42%|████▏     | 2022/4850 [09:39<13:37,  3.46it/s]

Epoch: 2, Loss: 3.0660290718078613


Processing epoch 01:  42%|████▏     | 2023/4850 [09:39<13:33,  3.47it/s]

Epoch: 2, Loss: 2.5307416915893555


Processing epoch 01:  42%|████▏     | 2024/4850 [09:40<13:30,  3.49it/s]

Epoch: 2, Loss: 2.4143106937408447


Processing epoch 01:  42%|████▏     | 2025/4850 [09:40<13:26,  3.50it/s]

Epoch: 2, Loss: 2.5424695014953613


Processing epoch 01:  42%|████▏     | 2026/4850 [09:40<13:19,  3.53it/s]

Epoch: 2, Loss: 2.805752754211426


Processing epoch 01:  42%|████▏     | 2027/4850 [09:40<13:19,  3.53it/s]

Epoch: 2, Loss: 2.432619094848633


Processing epoch 01:  42%|████▏     | 2028/4850 [09:41<13:18,  3.53it/s]

Epoch: 2, Loss: 2.7697277069091797


Processing epoch 01:  42%|████▏     | 2029/4850 [09:41<13:20,  3.52it/s]

Epoch: 2, Loss: 2.248077392578125


Processing epoch 01:  42%|████▏     | 2030/4850 [09:41<13:19,  3.53it/s]

Epoch: 2, Loss: 2.908277988433838


Processing epoch 01:  42%|████▏     | 2031/4850 [09:42<13:23,  3.51it/s]

Epoch: 2, Loss: 2.8407492637634277


Processing epoch 01:  42%|████▏     | 2032/4850 [09:42<13:29,  3.48it/s]

Epoch: 2, Loss: 2.2968931198120117


Processing epoch 01:  42%|████▏     | 2033/4850 [09:42<13:26,  3.49it/s]

Epoch: 2, Loss: 2.857567310333252


Processing epoch 01:  42%|████▏     | 2034/4850 [09:42<13:24,  3.50it/s]

Epoch: 2, Loss: 2.5260119438171387


Processing epoch 01:  42%|████▏     | 2035/4850 [09:43<13:20,  3.51it/s]

Epoch: 2, Loss: 2.6626410484313965


Processing epoch 01:  42%|████▏     | 2036/4850 [09:43<13:20,  3.51it/s]

Epoch: 2, Loss: 2.9013824462890625


Processing epoch 01:  42%|████▏     | 2037/4850 [09:43<13:16,  3.53it/s]

Epoch: 2, Loss: 2.7376811504364014


Processing epoch 01:  42%|████▏     | 2038/4850 [09:44<13:16,  3.53it/s]

Epoch: 2, Loss: 2.5212273597717285


Processing epoch 01:  42%|████▏     | 2039/4850 [09:44<13:21,  3.51it/s]

Epoch: 2, Loss: 2.8226823806762695


Processing epoch 01:  42%|████▏     | 2040/4850 [09:44<13:19,  3.51it/s]

Epoch: 2, Loss: 2.9152002334594727


Processing epoch 01:  42%|████▏     | 2041/4850 [09:44<13:22,  3.50it/s]

Epoch: 2, Loss: 2.2126235961914062


Processing epoch 01:  42%|████▏     | 2042/4850 [09:45<13:23,  3.50it/s]

Epoch: 2, Loss: 2.9808762073516846


Processing epoch 01:  42%|████▏     | 2043/4850 [09:45<13:23,  3.49it/s]

Epoch: 2, Loss: 2.8665387630462646


Processing epoch 01:  42%|████▏     | 2044/4850 [09:45<13:40,  3.42it/s]

Epoch: 2, Loss: 2.8709707260131836


Processing epoch 01:  42%|████▏     | 2045/4850 [09:46<13:33,  3.45it/s]

Epoch: 2, Loss: 2.373444080352783


Processing epoch 01:  42%|████▏     | 2046/4850 [09:46<13:25,  3.48it/s]

Epoch: 2, Loss: 3.5691256523132324


Processing epoch 01:  42%|████▏     | 2047/4850 [09:46<13:32,  3.45it/s]

Epoch: 2, Loss: 2.700772285461426


Processing epoch 01:  42%|████▏     | 2048/4850 [09:46<13:40,  3.42it/s]

Epoch: 2, Loss: 2.7186925411224365


Processing epoch 01:  42%|████▏     | 2049/4850 [09:47<13:52,  3.36it/s]

Epoch: 2, Loss: 2.676985263824463


Processing epoch 01:  42%|████▏     | 2050/4850 [09:47<13:52,  3.36it/s]

Epoch: 2, Loss: 2.2863001823425293


Processing epoch 01:  42%|████▏     | 2051/4850 [09:47<13:49,  3.37it/s]

Epoch: 2, Loss: 2.8415331840515137


Processing epoch 01:  42%|████▏     | 2052/4850 [09:48<13:55,  3.35it/s]

Epoch: 2, Loss: 2.843090295791626


Processing epoch 01:  42%|████▏     | 2053/4850 [09:48<13:55,  3.35it/s]

Epoch: 2, Loss: 2.3549435138702393


Processing epoch 01:  42%|████▏     | 2054/4850 [09:48<13:56,  3.34it/s]

Epoch: 2, Loss: 3.0534186363220215


Processing epoch 01:  42%|████▏     | 2055/4850 [09:49<13:51,  3.36it/s]

Epoch: 2, Loss: 2.0327517986297607


Processing epoch 01:  42%|████▏     | 2056/4850 [09:49<13:56,  3.34it/s]

Epoch: 2, Loss: 2.5922164916992188


Processing epoch 01:  42%|████▏     | 2057/4850 [09:49<13:48,  3.37it/s]

Epoch: 2, Loss: 1.755220890045166


Processing epoch 01:  42%|████▏     | 2058/4850 [09:49<13:33,  3.43it/s]

Epoch: 2, Loss: 3.927727699279785


Processing epoch 01:  42%|████▏     | 2059/4850 [09:50<13:25,  3.47it/s]

Epoch: 2, Loss: 2.716129779815674


Processing epoch 01:  42%|████▏     | 2060/4850 [09:50<13:19,  3.49it/s]

Epoch: 2, Loss: 2.7598531246185303


Processing epoch 01:  42%|████▏     | 2061/4850 [09:50<13:13,  3.51it/s]

Epoch: 2, Loss: 2.254399299621582


Processing epoch 01:  43%|████▎     | 2062/4850 [09:51<13:15,  3.51it/s]

Epoch: 2, Loss: 2.161306858062744


Processing epoch 01:  43%|████▎     | 2063/4850 [09:51<13:14,  3.51it/s]

Epoch: 2, Loss: 2.906285285949707


Processing epoch 01:  43%|████▎     | 2064/4850 [09:51<13:13,  3.51it/s]

Epoch: 2, Loss: 2.713395118713379


Processing epoch 01:  43%|████▎     | 2065/4850 [09:51<13:10,  3.52it/s]

Epoch: 2, Loss: 2.463620185852051


Processing epoch 01:  43%|████▎     | 2066/4850 [09:52<13:05,  3.54it/s]

Epoch: 2, Loss: 2.8727307319641113


Processing epoch 01:  43%|████▎     | 2067/4850 [09:52<13:04,  3.55it/s]

Epoch: 2, Loss: 2.815739154815674


Processing epoch 01:  43%|████▎     | 2068/4850 [09:52<13:07,  3.53it/s]

Epoch: 2, Loss: 2.235466957092285


Processing epoch 01:  43%|████▎     | 2069/4850 [09:53<13:05,  3.54it/s]

Epoch: 2, Loss: 2.5354599952697754


Processing epoch 01:  43%|████▎     | 2070/4850 [09:53<13:05,  3.54it/s]

Epoch: 2, Loss: 2.5506935119628906


Processing epoch 01:  43%|████▎     | 2071/4850 [09:53<13:04,  3.54it/s]

Epoch: 2, Loss: 2.6404218673706055


Processing epoch 01:  43%|████▎     | 2072/4850 [09:53<13:03,  3.54it/s]

Epoch: 2, Loss: 2.5746312141418457


Processing epoch 01:  43%|████▎     | 2073/4850 [09:54<13:01,  3.55it/s]

Epoch: 2, Loss: 2.758294105529785


Processing epoch 01:  43%|████▎     | 2074/4850 [09:54<13:06,  3.53it/s]

Epoch: 2, Loss: 2.788585662841797


Processing epoch 01:  43%|████▎     | 2075/4850 [09:54<13:07,  3.53it/s]

Epoch: 2, Loss: 2.348905563354492


Processing epoch 01:  43%|████▎     | 2076/4850 [09:54<13:04,  3.54it/s]

Epoch: 2, Loss: 2.4660942554473877


Processing epoch 01:  43%|████▎     | 2077/4850 [09:55<13:05,  3.53it/s]

Epoch: 2, Loss: 2.576780080795288


Processing epoch 01:  43%|████▎     | 2078/4850 [09:55<13:07,  3.52it/s]

Epoch: 2, Loss: 2.322843551635742


Processing epoch 01:  43%|████▎     | 2079/4850 [09:55<13:05,  3.53it/s]

Epoch: 2, Loss: 3.1239333152770996


Processing epoch 01:  43%|████▎     | 2080/4850 [09:56<13:03,  3.54it/s]

Epoch: 2, Loss: 2.7783584594726562


Processing epoch 01:  43%|████▎     | 2081/4850 [09:56<13:05,  3.53it/s]

Epoch: 2, Loss: 2.2735848426818848


Processing epoch 01:  43%|████▎     | 2082/4850 [09:56<13:09,  3.51it/s]

Epoch: 2, Loss: 2.0175395011901855


Processing epoch 01:  43%|████▎     | 2083/4850 [09:56<13:03,  3.53it/s]

Epoch: 2, Loss: 3.107700824737549


Processing epoch 01:  43%|████▎     | 2084/4850 [09:57<13:05,  3.52it/s]

Epoch: 2, Loss: 2.2183985710144043


Processing epoch 01:  43%|████▎     | 2085/4850 [09:57<13:02,  3.53it/s]

Epoch: 2, Loss: 2.28682017326355


Processing epoch 01:  43%|████▎     | 2086/4850 [09:57<13:10,  3.50it/s]

Epoch: 2, Loss: 2.502593517303467


Processing epoch 01:  43%|████▎     | 2087/4850 [09:58<13:07,  3.51it/s]

Epoch: 2, Loss: 3.029141426086426


Processing epoch 01:  43%|████▎     | 2088/4850 [09:58<13:05,  3.52it/s]

Epoch: 2, Loss: 2.2763538360595703


Processing epoch 01:  43%|████▎     | 2089/4850 [09:58<13:05,  3.51it/s]

Epoch: 2, Loss: 2.6985535621643066


Processing epoch 01:  43%|████▎     | 2090/4850 [09:58<13:01,  3.53it/s]

Epoch: 2, Loss: 2.4635443687438965


Processing epoch 01:  43%|████▎     | 2091/4850 [09:59<13:03,  3.52it/s]

Epoch: 2, Loss: 2.9238839149475098


Processing epoch 01:  43%|████▎     | 2092/4850 [09:59<13:14,  3.47it/s]

Epoch: 2, Loss: 2.496767997741699


Processing epoch 01:  43%|████▎     | 2093/4850 [09:59<13:33,  3.39it/s]

Epoch: 2, Loss: 3.1676738262176514


Processing epoch 01:  43%|████▎     | 2094/4850 [10:00<13:19,  3.45it/s]

Epoch: 2, Loss: 3.260931968688965


Processing epoch 01:  43%|████▎     | 2095/4850 [10:00<13:22,  3.43it/s]

Epoch: 2, Loss: 2.78887939453125


Processing epoch 01:  43%|████▎     | 2096/4850 [10:00<13:28,  3.41it/s]

Epoch: 2, Loss: 2.642550468444824


Processing epoch 01:  43%|████▎     | 2097/4850 [10:01<13:23,  3.43it/s]

Epoch: 2, Loss: 3.5623536109924316


Processing epoch 01:  43%|████▎     | 2098/4850 [10:01<13:20,  3.44it/s]

Epoch: 2, Loss: 2.75803279876709


Processing epoch 01:  43%|████▎     | 2099/4850 [10:01<13:12,  3.47it/s]

Epoch: 2, Loss: 2.595449447631836


Processing epoch 01:  43%|████▎     | 2100/4850 [10:01<13:34,  3.38it/s]

Epoch: 2, Loss: 2.6705148220062256


Processing epoch 01:  43%|████▎     | 2101/4850 [10:02<13:35,  3.37it/s]

Epoch: 2, Loss: 2.5475640296936035


Processing epoch 01:  43%|████▎     | 2102/4850 [10:02<13:31,  3.39it/s]

Epoch: 2, Loss: 2.673041820526123


Processing epoch 01:  43%|████▎     | 2103/4850 [10:02<13:21,  3.43it/s]

Epoch: 2, Loss: 2.399789810180664


Processing epoch 01:  43%|████▎     | 2104/4850 [10:03<13:30,  3.39it/s]

Epoch: 2, Loss: 2.6209452152252197


Processing epoch 01:  43%|████▎     | 2105/4850 [10:03<13:47,  3.32it/s]

Epoch: 2, Loss: 2.5739188194274902


Processing epoch 01:  43%|████▎     | 2106/4850 [10:03<13:48,  3.31it/s]

Epoch: 2, Loss: 2.825979709625244


Processing epoch 01:  43%|████▎     | 2107/4850 [10:03<13:33,  3.37it/s]

Epoch: 2, Loss: 2.3792295455932617


Processing epoch 01:  43%|████▎     | 2108/4850 [10:04<13:24,  3.41it/s]

Epoch: 2, Loss: 2.8417751789093018


Processing epoch 01:  43%|████▎     | 2109/4850 [10:04<13:16,  3.44it/s]

Epoch: 2, Loss: 2.9641900062561035


Processing epoch 01:  44%|████▎     | 2110/4850 [10:04<13:07,  3.48it/s]

Epoch: 2, Loss: 2.606691360473633


Processing epoch 01:  44%|████▎     | 2111/4850 [10:05<13:11,  3.46it/s]

Epoch: 2, Loss: 2.153597831726074


Processing epoch 01:  44%|████▎     | 2112/4850 [10:05<13:10,  3.46it/s]

Epoch: 2, Loss: 2.5523645877838135


Processing epoch 01:  44%|████▎     | 2113/4850 [10:05<13:15,  3.44it/s]

Epoch: 2, Loss: 2.6773195266723633


Processing epoch 01:  44%|████▎     | 2114/4850 [10:05<13:12,  3.45it/s]

Epoch: 2, Loss: 1.9436389207839966


Processing epoch 01:  44%|████▎     | 2115/4850 [10:06<13:02,  3.50it/s]

Epoch: 2, Loss: 2.8839101791381836


Processing epoch 01:  44%|████▎     | 2116/4850 [10:06<12:56,  3.52it/s]

Epoch: 2, Loss: 2.0092933177948


Processing epoch 01:  44%|████▎     | 2117/4850 [10:06<12:53,  3.53it/s]

Epoch: 2, Loss: 2.878682851791382


Processing epoch 01:  44%|████▎     | 2118/4850 [10:07<12:53,  3.53it/s]

Epoch: 2, Loss: 2.7309677600860596


Processing epoch 01:  44%|████▎     | 2119/4850 [10:07<12:49,  3.55it/s]

Epoch: 2, Loss: 2.8150081634521484


Processing epoch 01:  44%|████▎     | 2120/4850 [10:07<12:53,  3.53it/s]

Epoch: 2, Loss: 2.401594877243042


Processing epoch 01:  44%|████▎     | 2121/4850 [10:07<12:53,  3.53it/s]

Epoch: 2, Loss: 2.368381977081299


Processing epoch 01:  44%|████▍     | 2122/4850 [10:08<12:59,  3.50it/s]

Epoch: 2, Loss: 3.7928049564361572


Processing epoch 01:  44%|████▍     | 2123/4850 [10:08<12:52,  3.53it/s]

Epoch: 2, Loss: 4.574451446533203


Processing epoch 01:  44%|████▍     | 2124/4850 [10:08<12:52,  3.53it/s]

Epoch: 2, Loss: 2.5461840629577637


Processing epoch 01:  44%|████▍     | 2125/4850 [10:09<12:52,  3.53it/s]

Epoch: 2, Loss: 2.3216567039489746


Processing epoch 01:  44%|████▍     | 2126/4850 [10:09<12:54,  3.52it/s]

Epoch: 2, Loss: 2.767120838165283


Processing epoch 01:  44%|████▍     | 2127/4850 [10:09<12:53,  3.52it/s]

Epoch: 2, Loss: 2.7481472492218018


Processing epoch 01:  44%|████▍     | 2128/4850 [10:09<12:53,  3.52it/s]

Epoch: 2, Loss: 2.184352397918701


Processing epoch 01:  44%|████▍     | 2129/4850 [10:10<12:55,  3.51it/s]

Epoch: 2, Loss: 1.9651854038238525


Processing epoch 01:  44%|████▍     | 2130/4850 [10:10<12:55,  3.51it/s]

Epoch: 2, Loss: 2.9723639488220215


Processing epoch 01:  44%|████▍     | 2131/4850 [10:10<12:57,  3.50it/s]

Epoch: 2, Loss: 2.2627930641174316


Processing epoch 01:  44%|████▍     | 2132/4850 [10:11<12:54,  3.51it/s]

Epoch: 2, Loss: 2.6134133338928223


Processing epoch 01:  44%|████▍     | 2133/4850 [10:11<13:01,  3.48it/s]

Epoch: 2, Loss: 2.4144649505615234


Processing epoch 01:  44%|████▍     | 2134/4850 [10:11<12:57,  3.49it/s]

Epoch: 2, Loss: 2.438183307647705


Processing epoch 01:  44%|████▍     | 2135/4850 [10:11<12:55,  3.50it/s]

Epoch: 2, Loss: 2.5346121788024902


Processing epoch 01:  44%|████▍     | 2136/4850 [10:12<12:54,  3.50it/s]

Epoch: 2, Loss: 2.035393476486206


Processing epoch 01:  44%|████▍     | 2137/4850 [10:12<12:52,  3.51it/s]

Epoch: 2, Loss: 2.4210920333862305


Processing epoch 01:  44%|████▍     | 2138/4850 [10:12<12:50,  3.52it/s]

Epoch: 2, Loss: 2.6645216941833496


Processing epoch 01:  44%|████▍     | 2139/4850 [10:13<12:48,  3.53it/s]

Epoch: 2, Loss: 3.1572155952453613


Processing epoch 01:  44%|████▍     | 2140/4850 [10:13<12:49,  3.52it/s]

Epoch: 2, Loss: 2.485469341278076


Processing epoch 01:  44%|████▍     | 2141/4850 [10:13<12:46,  3.53it/s]

Epoch: 2, Loss: 2.847482681274414


Processing epoch 01:  44%|████▍     | 2142/4850 [10:13<12:55,  3.49it/s]

Epoch: 2, Loss: 2.611117362976074


Processing epoch 01:  44%|████▍     | 2143/4850 [10:14<12:56,  3.48it/s]

Epoch: 2, Loss: 2.6980438232421875


Processing epoch 01:  44%|████▍     | 2144/4850 [10:14<13:08,  3.43it/s]

Epoch: 2, Loss: 2.807438611984253


Processing epoch 01:  44%|████▍     | 2145/4850 [10:14<13:32,  3.33it/s]

Epoch: 2, Loss: 2.423673629760742


Processing epoch 01:  44%|████▍     | 2146/4850 [10:15<13:15,  3.40it/s]

Epoch: 2, Loss: 2.926880359649658


Processing epoch 01:  44%|████▍     | 2147/4850 [10:15<13:19,  3.38it/s]

Epoch: 2, Loss: 2.1701955795288086


Processing epoch 01:  44%|████▍     | 2148/4850 [10:15<13:08,  3.43it/s]

Epoch: 2, Loss: 3.020049810409546


Processing epoch 01:  44%|████▍     | 2149/4850 [10:16<13:02,  3.45it/s]

Epoch: 2, Loss: 2.8539352416992188


Processing epoch 01:  44%|████▍     | 2150/4850 [10:16<13:07,  3.43it/s]

Epoch: 2, Loss: 2.570923328399658


Processing epoch 01:  44%|████▍     | 2151/4850 [10:16<13:08,  3.42it/s]

Epoch: 2, Loss: 2.5475988388061523


Processing epoch 01:  44%|████▍     | 2152/4850 [10:16<13:07,  3.42it/s]

Epoch: 2, Loss: 2.6001572608947754


Processing epoch 01:  44%|████▍     | 2153/4850 [10:17<13:10,  3.41it/s]

Epoch: 2, Loss: 3.103895902633667


Processing epoch 01:  44%|████▍     | 2154/4850 [10:17<13:20,  3.37it/s]

Epoch: 2, Loss: 2.310349941253662


Processing epoch 01:  44%|████▍     | 2155/4850 [10:17<13:20,  3.37it/s]

Epoch: 2, Loss: 2.4484431743621826


Processing epoch 01:  44%|████▍     | 2156/4850 [10:18<13:24,  3.35it/s]

Epoch: 2, Loss: 2.525569438934326


Processing epoch 01:  44%|████▍     | 2157/4850 [10:18<13:13,  3.39it/s]

Epoch: 2, Loss: 2.405047655105591


Processing epoch 01:  44%|████▍     | 2158/4850 [10:18<13:03,  3.43it/s]

Epoch: 2, Loss: 2.4216666221618652


Processing epoch 01:  45%|████▍     | 2159/4850 [10:18<12:56,  3.47it/s]

Epoch: 2, Loss: 3.0453438758850098


Processing epoch 01:  45%|████▍     | 2160/4850 [10:19<12:51,  3.49it/s]

Epoch: 2, Loss: 3.1579155921936035


Processing epoch 01:  45%|████▍     | 2161/4850 [10:19<12:50,  3.49it/s]

Epoch: 2, Loss: 2.1202213764190674


Processing epoch 01:  45%|████▍     | 2162/4850 [10:19<12:46,  3.51it/s]

Epoch: 2, Loss: 3.3296048641204834


Processing epoch 01:  45%|████▍     | 2163/4850 [10:20<12:44,  3.51it/s]

Epoch: 2, Loss: 2.1753222942352295


Processing epoch 01:  45%|████▍     | 2164/4850 [10:20<12:41,  3.53it/s]

Epoch: 2, Loss: 2.8273661136627197


Processing epoch 01:  45%|████▍     | 2165/4850 [10:20<12:38,  3.54it/s]

Epoch: 2, Loss: 2.914196014404297


Processing epoch 01:  45%|████▍     | 2166/4850 [10:20<12:39,  3.53it/s]

Epoch: 2, Loss: 3.0852837562561035


Processing epoch 01:  45%|████▍     | 2167/4850 [10:21<12:35,  3.55it/s]

Epoch: 2, Loss: 2.770648717880249


Processing epoch 01:  45%|████▍     | 2168/4850 [10:21<12:38,  3.54it/s]

Epoch: 2, Loss: 2.988189697265625


Processing epoch 01:  45%|████▍     | 2169/4850 [10:21<12:42,  3.51it/s]

Epoch: 2, Loss: 2.6600890159606934


Processing epoch 01:  45%|████▍     | 2170/4850 [10:22<12:40,  3.52it/s]

Epoch: 2, Loss: 1.9437668323516846


Processing epoch 01:  45%|████▍     | 2171/4850 [10:22<12:40,  3.52it/s]

Epoch: 2, Loss: 2.8766822814941406


Processing epoch 01:  45%|████▍     | 2172/4850 [10:22<12:37,  3.53it/s]

Epoch: 2, Loss: 2.9000091552734375


Processing epoch 01:  45%|████▍     | 2173/4850 [10:22<12:41,  3.51it/s]

Epoch: 2, Loss: 2.546599864959717


Processing epoch 01:  45%|████▍     | 2174/4850 [10:23<12:44,  3.50it/s]

Epoch: 2, Loss: 2.704157829284668


Processing epoch 01:  45%|████▍     | 2175/4850 [10:23<12:43,  3.50it/s]

Epoch: 2, Loss: 2.4680237770080566


Processing epoch 01:  45%|████▍     | 2176/4850 [10:23<12:42,  3.50it/s]

Epoch: 2, Loss: 2.65380859375


Processing epoch 01:  45%|████▍     | 2177/4850 [10:24<12:40,  3.51it/s]

Epoch: 2, Loss: 2.618255853652954


Processing epoch 01:  45%|████▍     | 2178/4850 [10:24<12:38,  3.52it/s]

Epoch: 2, Loss: 2.6440958976745605


Processing epoch 01:  45%|████▍     | 2179/4850 [10:24<12:36,  3.53it/s]

Epoch: 2, Loss: 3.1641950607299805


Processing epoch 01:  45%|████▍     | 2180/4850 [10:24<12:38,  3.52it/s]

Epoch: 2, Loss: 3.1945738792419434


Processing epoch 01:  45%|████▍     | 2181/4850 [10:25<12:35,  3.53it/s]

Epoch: 2, Loss: 2.379122257232666


Processing epoch 01:  45%|████▍     | 2182/4850 [10:25<12:34,  3.54it/s]

Epoch: 2, Loss: 2.3825106620788574


Processing epoch 01:  45%|████▌     | 2183/4850 [10:25<12:33,  3.54it/s]

Epoch: 2, Loss: 2.971827268600464


Processing epoch 01:  45%|████▌     | 2184/4850 [10:26<12:35,  3.53it/s]

Epoch: 2, Loss: 2.8625059127807617


Processing epoch 01:  45%|████▌     | 2185/4850 [10:26<12:35,  3.53it/s]

Epoch: 2, Loss: 2.6014034748077393


Processing epoch 01:  45%|████▌     | 2186/4850 [10:26<12:37,  3.52it/s]

Epoch: 2, Loss: 2.4986462593078613


Processing epoch 01:  45%|████▌     | 2187/4850 [10:26<12:35,  3.52it/s]

Epoch: 2, Loss: 3.1990978717803955


Processing epoch 01:  45%|████▌     | 2188/4850 [10:27<12:39,  3.51it/s]

Epoch: 2, Loss: 2.4987082481384277


Processing epoch 01:  45%|████▌     | 2189/4850 [10:27<12:38,  3.51it/s]

Epoch: 2, Loss: 2.7184064388275146


Processing epoch 01:  45%|████▌     | 2190/4850 [10:27<12:37,  3.51it/s]

Epoch: 2, Loss: 3.0513648986816406


Processing epoch 01:  45%|████▌     | 2191/4850 [10:28<12:45,  3.47it/s]

Epoch: 2, Loss: 2.7731311321258545


Processing epoch 01:  45%|████▌     | 2192/4850 [10:28<12:50,  3.45it/s]

Epoch: 2, Loss: 2.019458770751953


Processing epoch 01:  45%|████▌     | 2193/4850 [10:28<12:57,  3.42it/s]

Epoch: 2, Loss: 2.4519007205963135


Processing epoch 01:  45%|████▌     | 2194/4850 [10:28<13:06,  3.38it/s]

Epoch: 2, Loss: 2.910604953765869


Processing epoch 01:  45%|████▌     | 2195/4850 [10:29<12:58,  3.41it/s]

Epoch: 2, Loss: 3.316385269165039


Processing epoch 01:  45%|████▌     | 2196/4850 [10:29<12:58,  3.41it/s]

Epoch: 2, Loss: 2.3947489261627197


Processing epoch 01:  45%|████▌     | 2197/4850 [10:29<12:49,  3.45it/s]

Epoch: 2, Loss: 2.808166027069092


Processing epoch 01:  45%|████▌     | 2198/4850 [10:30<12:56,  3.41it/s]

Epoch: 2, Loss: 3.396486282348633


Processing epoch 01:  45%|████▌     | 2199/4850 [10:30<13:00,  3.40it/s]

Epoch: 2, Loss: 2.4468843936920166


Processing epoch 01:  45%|████▌     | 2200/4850 [10:30<13:09,  3.36it/s]

Epoch: 2, Loss: 2.3303208351135254


Processing epoch 01:  45%|████▌     | 2201/4850 [10:30<13:04,  3.38it/s]

Epoch: 2, Loss: 2.9258551597595215


Processing epoch 01:  45%|████▌     | 2202/4850 [10:31<13:11,  3.34it/s]

Epoch: 2, Loss: 2.3153553009033203


Processing epoch 01:  45%|████▌     | 2203/4850 [10:31<13:00,  3.39it/s]

Epoch: 2, Loss: 2.997706890106201


Processing epoch 01:  45%|████▌     | 2204/4850 [10:31<12:54,  3.42it/s]

Epoch: 2, Loss: 2.4209134578704834


Processing epoch 01:  45%|████▌     | 2205/4850 [10:32<13:01,  3.38it/s]

Epoch: 2, Loss: 1.9211230278015137


Processing epoch 01:  45%|████▌     | 2206/4850 [10:32<13:14,  3.33it/s]

Epoch: 2, Loss: 2.796353816986084


Processing epoch 01:  46%|████▌     | 2207/4850 [10:32<13:03,  3.37it/s]

Epoch: 2, Loss: 2.710963726043701


Processing epoch 01:  46%|████▌     | 2208/4850 [10:33<12:52,  3.42it/s]

Epoch: 2, Loss: 2.5429129600524902


Processing epoch 01:  46%|████▌     | 2209/4850 [10:33<12:42,  3.46it/s]

Epoch: 2, Loss: 2.3179993629455566


Processing epoch 01:  46%|████▌     | 2210/4850 [10:33<12:41,  3.46it/s]

Epoch: 2, Loss: 2.910388946533203


Processing epoch 01:  46%|████▌     | 2211/4850 [10:33<12:35,  3.49it/s]

Epoch: 2, Loss: 2.401815414428711


Processing epoch 01:  46%|████▌     | 2212/4850 [10:34<12:27,  3.53it/s]

Epoch: 2, Loss: 3.2852237224578857


Processing epoch 01:  46%|████▌     | 2213/4850 [10:34<12:27,  3.53it/s]

Epoch: 2, Loss: 2.8698220252990723


Processing epoch 01:  46%|████▌     | 2214/4850 [10:34<12:25,  3.53it/s]

Epoch: 2, Loss: 2.8090991973876953


Processing epoch 01:  46%|████▌     | 2215/4850 [10:35<12:25,  3.53it/s]

Epoch: 2, Loss: 2.2922258377075195


Processing epoch 01:  46%|████▌     | 2216/4850 [10:35<12:31,  3.50it/s]

Epoch: 2, Loss: 2.0493688583374023


Processing epoch 01:  46%|████▌     | 2217/4850 [10:35<12:28,  3.52it/s]

Epoch: 2, Loss: 2.342348575592041


Processing epoch 01:  46%|████▌     | 2218/4850 [10:35<12:26,  3.53it/s]

Epoch: 2, Loss: 2.444929838180542


Processing epoch 01:  46%|████▌     | 2219/4850 [10:36<12:26,  3.52it/s]

Epoch: 2, Loss: 2.4670968055725098


Processing epoch 01:  46%|████▌     | 2220/4850 [10:36<12:26,  3.52it/s]

Epoch: 2, Loss: 2.8013153076171875


Processing epoch 01:  46%|████▌     | 2221/4850 [10:36<12:28,  3.51it/s]

Epoch: 2, Loss: 2.8641481399536133


Processing epoch 01:  46%|████▌     | 2222/4850 [10:37<12:27,  3.51it/s]

Epoch: 2, Loss: 2.5472347736358643


Processing epoch 01:  46%|████▌     | 2223/4850 [10:37<12:27,  3.51it/s]

Epoch: 2, Loss: 2.8286848068237305


Processing epoch 01:  46%|████▌     | 2224/4850 [10:37<12:25,  3.52it/s]

Epoch: 2, Loss: 2.361427068710327


Processing epoch 01:  46%|████▌     | 2225/4850 [10:37<12:22,  3.53it/s]

Epoch: 2, Loss: 3.0557894706726074


Processing epoch 01:  46%|████▌     | 2226/4850 [10:38<12:21,  3.54it/s]

Epoch: 2, Loss: 2.972280979156494


Processing epoch 01:  46%|████▌     | 2227/4850 [10:38<12:26,  3.52it/s]

Epoch: 2, Loss: 2.5134568214416504


Processing epoch 01:  46%|████▌     | 2228/4850 [10:38<12:26,  3.51it/s]

Epoch: 2, Loss: 2.442321300506592


Processing epoch 01:  46%|████▌     | 2229/4850 [10:39<12:33,  3.48it/s]

Epoch: 2, Loss: 2.612079620361328


Processing epoch 01:  46%|████▌     | 2230/4850 [10:39<12:30,  3.49it/s]

Epoch: 2, Loss: 2.320253849029541


Processing epoch 01:  46%|████▌     | 2231/4850 [10:39<12:26,  3.51it/s]

Epoch: 2, Loss: 3.397674083709717


Processing epoch 01:  46%|████▌     | 2232/4850 [10:39<12:26,  3.51it/s]

Epoch: 2, Loss: 2.7726712226867676


Processing epoch 01:  46%|████▌     | 2233/4850 [10:40<12:25,  3.51it/s]

Epoch: 2, Loss: 2.963334798812866


Processing epoch 01:  46%|████▌     | 2234/4850 [10:40<12:30,  3.49it/s]

Epoch: 2, Loss: 2.4781746864318848


Processing epoch 01:  46%|████▌     | 2235/4850 [10:40<12:26,  3.50it/s]

Epoch: 2, Loss: 3.0850753784179688


Processing epoch 01:  46%|████▌     | 2236/4850 [10:41<12:26,  3.50it/s]

Epoch: 2, Loss: 2.381258726119995


Processing epoch 01:  46%|████▌     | 2237/4850 [10:41<12:20,  3.53it/s]

Epoch: 2, Loss: 2.7763006687164307


Processing epoch 01:  46%|████▌     | 2238/4850 [10:41<12:29,  3.49it/s]

Epoch: 2, Loss: 2.9640583992004395


Processing epoch 01:  46%|████▌     | 2239/4850 [10:41<12:28,  3.49it/s]

Epoch: 2, Loss: 2.7702550888061523


Processing epoch 01:  46%|████▌     | 2240/4850 [10:42<12:26,  3.50it/s]

Epoch: 2, Loss: 2.7599639892578125


Processing epoch 01:  46%|████▌     | 2241/4850 [10:42<12:30,  3.48it/s]

Epoch: 2, Loss: 2.424435615539551


Processing epoch 01:  46%|████▌     | 2242/4850 [10:42<12:37,  3.45it/s]

Epoch: 2, Loss: 2.3342669010162354


Processing epoch 01:  46%|████▌     | 2243/4850 [10:43<12:48,  3.39it/s]

Epoch: 2, Loss: 2.5430290699005127


Processing epoch 01:  46%|████▋     | 2244/4850 [10:43<12:56,  3.36it/s]

Epoch: 2, Loss: 2.641416072845459


Processing epoch 01:  46%|████▋     | 2245/4850 [10:43<12:58,  3.34it/s]

Epoch: 2, Loss: 2.3182005882263184


Processing epoch 01:  46%|████▋     | 2246/4850 [10:43<12:56,  3.35it/s]

Epoch: 2, Loss: 2.1201329231262207


Processing epoch 01:  46%|████▋     | 2247/4850 [10:44<12:57,  3.35it/s]

Epoch: 2, Loss: 3.1147735118865967


Processing epoch 01:  46%|████▋     | 2248/4850 [10:44<13:00,  3.33it/s]

Epoch: 2, Loss: 2.4351272583007812


Processing epoch 01:  46%|████▋     | 2249/4850 [10:44<12:53,  3.36it/s]

Epoch: 2, Loss: 2.4461708068847656


Processing epoch 01:  46%|████▋     | 2250/4850 [10:45<13:05,  3.31it/s]

Epoch: 2, Loss: 2.2821903228759766


Processing epoch 01:  46%|████▋     | 2251/4850 [10:45<13:04,  3.31it/s]

Epoch: 2, Loss: 2.493375301361084


Processing epoch 01:  46%|████▋     | 2252/4850 [10:45<13:12,  3.28it/s]

Epoch: 2, Loss: 2.163142204284668


Processing epoch 01:  46%|████▋     | 2253/4850 [10:46<13:13,  3.27it/s]

Epoch: 2, Loss: 2.3193066120147705


Processing epoch 01:  46%|████▋     | 2254/4850 [10:46<13:02,  3.32it/s]

Epoch: 2, Loss: 2.8977086544036865


Processing epoch 01:  46%|████▋     | 2255/4850 [10:46<13:03,  3.31it/s]

Epoch: 2, Loss: 2.2517080307006836


Processing epoch 01:  47%|████▋     | 2256/4850 [10:46<13:03,  3.31it/s]

Epoch: 2, Loss: 2.5964651107788086


Processing epoch 01:  47%|████▋     | 2257/4850 [10:47<12:51,  3.36it/s]

Epoch: 2, Loss: 2.7135939598083496


Processing epoch 01:  47%|████▋     | 2258/4850 [10:47<12:38,  3.42it/s]

Epoch: 2, Loss: 2.1166300773620605


Processing epoch 01:  47%|████▋     | 2259/4850 [10:47<12:31,  3.45it/s]

Epoch: 2, Loss: 2.851123332977295


Processing epoch 01:  47%|████▋     | 2260/4850 [10:48<12:26,  3.47it/s]

Epoch: 2, Loss: 2.471937894821167


Processing epoch 01:  47%|████▋     | 2261/4850 [10:48<12:19,  3.50it/s]

Epoch: 2, Loss: 2.127514362335205


Processing epoch 01:  47%|████▋     | 2262/4850 [10:48<12:20,  3.50it/s]

Epoch: 2, Loss: 3.0161397457122803


Processing epoch 01:  47%|████▋     | 2263/4850 [10:48<12:26,  3.47it/s]

Epoch: 2, Loss: 2.6575093269348145


Processing epoch 01:  47%|████▋     | 2264/4850 [10:49<12:17,  3.50it/s]

Epoch: 2, Loss: 2.9743356704711914


Processing epoch 01:  47%|████▋     | 2265/4850 [10:49<12:12,  3.53it/s]

Epoch: 2, Loss: 2.5529465675354004


Processing epoch 01:  47%|████▋     | 2266/4850 [10:49<12:11,  3.53it/s]

Epoch: 2, Loss: 2.495370864868164


Processing epoch 01:  47%|████▋     | 2267/4850 [10:50<12:11,  3.53it/s]

Epoch: 2, Loss: 2.595693349838257


Processing epoch 01:  47%|████▋     | 2268/4850 [10:50<12:10,  3.53it/s]

Epoch: 2, Loss: 2.0351369380950928


Processing epoch 01:  47%|████▋     | 2269/4850 [10:50<12:10,  3.53it/s]

Epoch: 2, Loss: 3.102586269378662


Processing epoch 01:  47%|████▋     | 2270/4850 [10:50<12:09,  3.53it/s]

Epoch: 2, Loss: 1.9002091884613037


Processing epoch 01:  47%|████▋     | 2271/4850 [10:51<12:09,  3.53it/s]

Epoch: 2, Loss: 2.242560625076294


Processing epoch 01:  47%|████▋     | 2272/4850 [10:51<12:09,  3.53it/s]

Epoch: 2, Loss: 2.4674060344696045


Processing epoch 01:  47%|████▋     | 2273/4850 [10:51<12:08,  3.54it/s]

Epoch: 2, Loss: 2.736391067504883


Processing epoch 01:  47%|████▋     | 2274/4850 [10:52<12:11,  3.52it/s]

Epoch: 2, Loss: 2.6591453552246094


Processing epoch 01:  47%|████▋     | 2275/4850 [10:52<12:11,  3.52it/s]

Epoch: 2, Loss: 2.502260684967041


Processing epoch 01:  47%|████▋     | 2276/4850 [10:52<12:14,  3.50it/s]

Epoch: 2, Loss: 2.294898748397827


Processing epoch 01:  47%|████▋     | 2277/4850 [10:52<12:12,  3.51it/s]

Epoch: 2, Loss: 2.4578170776367188


Processing epoch 01:  47%|████▋     | 2278/4850 [10:53<12:08,  3.53it/s]

Epoch: 2, Loss: 2.8630571365356445


Processing epoch 01:  47%|████▋     | 2279/4850 [10:53<12:07,  3.53it/s]

Epoch: 2, Loss: 2.225313186645508


Processing epoch 01:  47%|████▋     | 2280/4850 [10:53<12:07,  3.53it/s]

Epoch: 2, Loss: 2.058668851852417


Processing epoch 01:  47%|████▋     | 2281/4850 [10:54<12:16,  3.49it/s]

Epoch: 2, Loss: 2.5002238750457764


Processing epoch 01:  47%|████▋     | 2282/4850 [10:54<12:15,  3.49it/s]

Epoch: 2, Loss: 3.145545244216919


Processing epoch 01:  47%|████▋     | 2283/4850 [10:54<12:09,  3.52it/s]

Epoch: 2, Loss: 3.002642869949341


Processing epoch 01:  47%|████▋     | 2284/4850 [10:54<12:07,  3.52it/s]

Epoch: 2, Loss: 2.5365281105041504


Processing epoch 01:  47%|████▋     | 2285/4850 [10:55<12:08,  3.52it/s]

Epoch: 2, Loss: 1.9826297760009766


Processing epoch 01:  47%|████▋     | 2286/4850 [10:55<12:07,  3.52it/s]

Epoch: 2, Loss: 2.490267038345337


Processing epoch 01:  47%|████▋     | 2287/4850 [10:55<12:07,  3.52it/s]

Epoch: 2, Loss: 2.902104377746582


Processing epoch 01:  47%|████▋     | 2288/4850 [10:56<12:17,  3.47it/s]

Epoch: 2, Loss: 2.5136501789093018


Processing epoch 01:  47%|████▋     | 2289/4850 [10:56<12:11,  3.50it/s]

Epoch: 2, Loss: 2.699916124343872


Processing epoch 01:  47%|████▋     | 2290/4850 [10:56<12:07,  3.52it/s]

Epoch: 2, Loss: 2.2685160636901855


Processing epoch 01:  47%|████▋     | 2291/4850 [10:56<12:05,  3.53it/s]

Epoch: 2, Loss: 2.4430253505706787


Processing epoch 01:  47%|████▋     | 2292/4850 [10:57<12:20,  3.46it/s]

Epoch: 2, Loss: 3.3984034061431885


Processing epoch 01:  47%|████▋     | 2293/4850 [10:57<12:31,  3.40it/s]

Epoch: 2, Loss: 2.710627555847168


Processing epoch 01:  47%|████▋     | 2294/4850 [10:57<12:27,  3.42it/s]

Epoch: 2, Loss: 2.7114853858947754


Processing epoch 01:  47%|████▋     | 2295/4850 [10:58<12:21,  3.45it/s]

Epoch: 2, Loss: 3.1881046295166016


Processing epoch 01:  47%|████▋     | 2296/4850 [10:58<12:19,  3.45it/s]

Epoch: 2, Loss: 2.6907248497009277


Processing epoch 01:  47%|████▋     | 2297/4850 [10:58<12:26,  3.42it/s]

Epoch: 2, Loss: 2.8927810192108154


Processing epoch 01:  47%|████▋     | 2298/4850 [10:58<12:16,  3.47it/s]

Epoch: 2, Loss: 2.5807065963745117


Processing epoch 01:  47%|████▋     | 2299/4850 [10:59<12:16,  3.46it/s]

Epoch: 2, Loss: 1.9858267307281494


Processing epoch 01:  47%|████▋     | 2300/4850 [10:59<12:23,  3.43it/s]

Epoch: 2, Loss: 2.727085828781128


Processing epoch 01:  47%|████▋     | 2301/4850 [10:59<12:40,  3.35it/s]

Epoch: 2, Loss: 2.609846830368042


Processing epoch 01:  47%|████▋     | 2302/4850 [11:00<12:30,  3.39it/s]

Epoch: 2, Loss: 2.5228500366210938


Processing epoch 01:  47%|████▋     | 2303/4850 [11:00<12:36,  3.37it/s]

Epoch: 2, Loss: 2.522007703781128


Processing epoch 01:  48%|████▊     | 2304/4850 [11:00<12:42,  3.34it/s]

Epoch: 2, Loss: 2.3197760581970215


Processing epoch 01:  48%|████▊     | 2305/4850 [11:01<12:43,  3.33it/s]

Epoch: 2, Loss: 3.0523529052734375


Processing epoch 01:  48%|████▊     | 2306/4850 [11:01<12:52,  3.29it/s]

Epoch: 2, Loss: 2.3191022872924805


Processing epoch 01:  48%|████▊     | 2307/4850 [11:01<12:49,  3.31it/s]

Epoch: 2, Loss: 2.8257808685302734


Processing epoch 01:  48%|████▊     | 2308/4850 [11:01<12:33,  3.37it/s]

Epoch: 2, Loss: 2.0731663703918457


Processing epoch 01:  48%|████▊     | 2309/4850 [11:02<12:26,  3.40it/s]

Epoch: 2, Loss: 2.196023464202881


Processing epoch 01:  48%|████▊     | 2310/4850 [11:02<12:24,  3.41it/s]

Epoch: 2, Loss: 2.573962926864624


Processing epoch 01:  48%|████▊     | 2311/4850 [11:02<12:21,  3.42it/s]

Epoch: 2, Loss: 2.06608247756958


Processing epoch 01:  48%|████▊     | 2312/4850 [11:03<12:13,  3.46it/s]

Epoch: 2, Loss: 2.524941921234131


Processing epoch 01:  48%|████▊     | 2313/4850 [11:03<12:10,  3.47it/s]

Epoch: 2, Loss: 2.775251865386963


Processing epoch 01:  48%|████▊     | 2314/4850 [11:03<12:08,  3.48it/s]

Epoch: 2, Loss: 2.491445302963257


Processing epoch 01:  48%|████▊     | 2315/4850 [11:03<12:06,  3.49it/s]

Epoch: 2, Loss: 2.2078804969787598


Processing epoch 01:  48%|████▊     | 2316/4850 [11:04<12:03,  3.50it/s]

Epoch: 2, Loss: 2.1966686248779297


Processing epoch 01:  48%|████▊     | 2317/4850 [11:04<11:58,  3.53it/s]

Epoch: 2, Loss: 3.6306681632995605


Processing epoch 01:  48%|████▊     | 2318/4850 [11:04<12:01,  3.51it/s]

Epoch: 2, Loss: 3.1872005462646484


Processing epoch 01:  48%|████▊     | 2319/4850 [11:05<11:58,  3.52it/s]

Epoch: 2, Loss: 2.5316333770751953


Processing epoch 01:  48%|████▊     | 2320/4850 [11:05<12:03,  3.49it/s]

Epoch: 2, Loss: 2.310929298400879


Processing epoch 01:  48%|████▊     | 2321/4850 [11:05<11:59,  3.51it/s]

Epoch: 2, Loss: 2.6024951934814453


Processing epoch 01:  48%|████▊     | 2322/4850 [11:05<11:59,  3.51it/s]

Epoch: 2, Loss: 2.330669403076172


Processing epoch 01:  48%|████▊     | 2323/4850 [11:06<12:07,  3.47it/s]

Epoch: 2, Loss: 2.446140766143799


Processing epoch 01:  48%|████▊     | 2324/4850 [11:06<12:06,  3.48it/s]

Epoch: 2, Loss: 2.251124382019043


Processing epoch 01:  48%|████▊     | 2325/4850 [11:06<12:20,  3.41it/s]

Epoch: 2, Loss: 2.3522870540618896


Processing epoch 01:  48%|████▊     | 2326/4850 [11:07<12:31,  3.36it/s]

Epoch: 2, Loss: 2.465254068374634


Processing epoch 01:  48%|████▊     | 2327/4850 [11:07<12:22,  3.40it/s]

Epoch: 2, Loss: 2.4236321449279785


Processing epoch 01:  48%|████▊     | 2328/4850 [11:07<12:27,  3.37it/s]

Epoch: 2, Loss: 2.34298038482666


Processing epoch 01:  48%|████▊     | 2329/4850 [11:08<12:15,  3.43it/s]

Epoch: 2, Loss: 2.51175594329834


Processing epoch 01:  48%|████▊     | 2330/4850 [11:08<12:19,  3.41it/s]

Epoch: 2, Loss: 2.4342174530029297


Processing epoch 01:  48%|████▊     | 2331/4850 [11:08<12:27,  3.37it/s]

Epoch: 2, Loss: 2.739642858505249


Processing epoch 01:  48%|████▊     | 2332/4850 [11:08<12:30,  3.35it/s]

Epoch: 2, Loss: 2.530863046646118


Processing epoch 01:  48%|████▊     | 2333/4850 [11:09<12:36,  3.33it/s]

Epoch: 2, Loss: 2.8671061992645264


Processing epoch 01:  48%|████▊     | 2334/4850 [11:09<12:43,  3.29it/s]

Epoch: 2, Loss: 2.900346279144287


Processing epoch 01:  48%|████▊     | 2335/4850 [11:09<12:33,  3.34it/s]

Epoch: 2, Loss: 2.9706735610961914


Processing epoch 01:  48%|████▊     | 2336/4850 [11:10<12:29,  3.35it/s]

Epoch: 2, Loss: 2.0716803073883057


Processing epoch 01:  48%|████▊     | 2337/4850 [11:10<12:24,  3.37it/s]

Epoch: 2, Loss: 2.9382357597351074


Processing epoch 01:  48%|████▊     | 2338/4850 [11:10<12:25,  3.37it/s]

Epoch: 2, Loss: 2.1510167121887207


Processing epoch 01:  48%|████▊     | 2339/4850 [11:10<12:14,  3.42it/s]

Epoch: 2, Loss: 2.8854904174804688


Processing epoch 01:  48%|████▊     | 2340/4850 [11:11<12:12,  3.43it/s]

Epoch: 2, Loss: 2.256683588027954


Processing epoch 01:  48%|████▊     | 2341/4850 [11:11<12:03,  3.47it/s]

Epoch: 2, Loss: 2.689347982406616


Processing epoch 01:  48%|████▊     | 2342/4850 [11:11<12:10,  3.43it/s]

Epoch: 2, Loss: 2.89296293258667


Processing epoch 01:  48%|████▊     | 2343/4850 [11:12<12:08,  3.44it/s]

Epoch: 2, Loss: 2.759049892425537


Processing epoch 01:  48%|████▊     | 2344/4850 [11:12<12:25,  3.36it/s]

Epoch: 2, Loss: 2.8616995811462402


Processing epoch 01:  48%|████▊     | 2345/4850 [11:12<12:27,  3.35it/s]

Epoch: 2, Loss: 2.4723827838897705


Processing epoch 01:  48%|████▊     | 2346/4850 [11:13<12:26,  3.35it/s]

Epoch: 2, Loss: 2.547420024871826


Processing epoch 01:  48%|████▊     | 2347/4850 [11:13<12:25,  3.36it/s]

Epoch: 2, Loss: 2.465848445892334


Processing epoch 01:  48%|████▊     | 2348/4850 [11:13<12:14,  3.41it/s]

Epoch: 2, Loss: 2.568037509918213


Processing epoch 01:  48%|████▊     | 2349/4850 [11:13<12:04,  3.45it/s]

Epoch: 2, Loss: 2.5297255516052246


Processing epoch 01:  48%|████▊     | 2350/4850 [11:14<12:11,  3.42it/s]

Epoch: 2, Loss: 2.3431766033172607


Processing epoch 01:  48%|████▊     | 2351/4850 [11:14<12:19,  3.38it/s]

Epoch: 2, Loss: 2.048920154571533


Processing epoch 01:  48%|████▊     | 2352/4850 [11:14<12:21,  3.37it/s]

Epoch: 2, Loss: 2.5419259071350098


Processing epoch 01:  49%|████▊     | 2353/4850 [11:15<12:17,  3.38it/s]

Epoch: 2, Loss: 2.2773494720458984


Processing epoch 01:  49%|████▊     | 2354/4850 [11:15<12:19,  3.38it/s]

Epoch: 2, Loss: 2.6564369201660156


Processing epoch 01:  49%|████▊     | 2355/4850 [11:15<12:15,  3.39it/s]

Epoch: 2, Loss: 3.4596776962280273


Processing epoch 01:  49%|████▊     | 2356/4850 [11:15<12:11,  3.41it/s]

Epoch: 2, Loss: 2.2127983570098877


Processing epoch 01:  49%|████▊     | 2357/4850 [11:16<12:28,  3.33it/s]

Epoch: 2, Loss: 2.8736629486083984


Processing epoch 01:  49%|████▊     | 2358/4850 [11:16<12:11,  3.41it/s]

Epoch: 2, Loss: 2.6709072589874268


Processing epoch 01:  49%|████▊     | 2359/4850 [11:16<12:04,  3.44it/s]

Epoch: 2, Loss: 2.5350611209869385


Processing epoch 01:  49%|████▊     | 2360/4850 [11:17<12:00,  3.45it/s]

Epoch: 2, Loss: 2.6854052543640137


Processing epoch 01:  49%|████▊     | 2361/4850 [11:17<11:52,  3.49it/s]

Epoch: 2, Loss: 2.6189260482788086


Processing epoch 01:  49%|████▊     | 2362/4850 [11:17<11:52,  3.49it/s]

Epoch: 2, Loss: 2.3140716552734375


Processing epoch 01:  49%|████▊     | 2363/4850 [11:18<11:49,  3.51it/s]

Epoch: 2, Loss: 2.5512495040893555


Processing epoch 01:  49%|████▊     | 2364/4850 [11:18<11:45,  3.52it/s]

Epoch: 2, Loss: 2.6752405166625977


Processing epoch 01:  49%|████▉     | 2365/4850 [11:18<11:49,  3.50it/s]

Epoch: 2, Loss: 2.0671300888061523


Processing epoch 01:  49%|████▉     | 2366/4850 [11:18<11:45,  3.52it/s]

Epoch: 2, Loss: 2.5882279872894287


Processing epoch 01:  49%|████▉     | 2367/4850 [11:19<11:45,  3.52it/s]

Epoch: 2, Loss: 2.1657636165618896


Processing epoch 01:  49%|████▉     | 2368/4850 [11:19<11:51,  3.49it/s]

Epoch: 2, Loss: 2.8707728385925293


Processing epoch 01:  49%|████▉     | 2369/4850 [11:19<11:53,  3.48it/s]

Epoch: 2, Loss: 2.6290149688720703


Processing epoch 01:  49%|████▉     | 2370/4850 [11:20<11:48,  3.50it/s]

Epoch: 2, Loss: 2.2428224086761475


Processing epoch 01:  49%|████▉     | 2371/4850 [11:20<11:45,  3.51it/s]

Epoch: 2, Loss: 2.3665082454681396


Processing epoch 01:  49%|████▉     | 2372/4850 [11:20<11:45,  3.51it/s]

Epoch: 2, Loss: 2.195218563079834


Processing epoch 01:  49%|████▉     | 2373/4850 [11:20<11:44,  3.51it/s]

Epoch: 2, Loss: 2.3516249656677246


Processing epoch 01:  49%|████▉     | 2374/4850 [11:21<11:43,  3.52it/s]

Epoch: 2, Loss: 3.01363205909729


Processing epoch 01:  49%|████▉     | 2375/4850 [11:21<11:49,  3.49it/s]

Epoch: 2, Loss: 3.808473587036133


Processing epoch 01:  49%|████▉     | 2376/4850 [11:21<11:48,  3.49it/s]

Epoch: 2, Loss: 2.699118137359619


Processing epoch 01:  49%|████▉     | 2377/4850 [11:22<11:45,  3.51it/s]

Epoch: 2, Loss: 2.4423282146453857


Processing epoch 01:  49%|████▉     | 2378/4850 [11:22<11:41,  3.53it/s]

Epoch: 2, Loss: 2.5706725120544434


Processing epoch 01:  49%|████▉     | 2379/4850 [11:22<11:41,  3.52it/s]

Epoch: 2, Loss: 2.6307106018066406


Processing epoch 01:  49%|████▉     | 2380/4850 [11:22<11:41,  3.52it/s]

Epoch: 2, Loss: 2.3901009559631348


Processing epoch 01:  49%|████▉     | 2381/4850 [11:23<11:41,  3.52it/s]

Epoch: 2, Loss: 2.7121517658233643


Processing epoch 01:  49%|████▉     | 2382/4850 [11:23<11:41,  3.52it/s]

Epoch: 2, Loss: 2.729022741317749


Processing epoch 01:  49%|████▉     | 2383/4850 [11:23<11:41,  3.52it/s]

Epoch: 2, Loss: 2.113863468170166


Processing epoch 01:  49%|████▉     | 2384/4850 [11:23<11:40,  3.52it/s]

Epoch: 2, Loss: 2.2235121726989746


Processing epoch 01:  49%|████▉     | 2385/4850 [11:24<11:42,  3.51it/s]

Epoch: 2, Loss: 2.463602066040039


Processing epoch 01:  49%|████▉     | 2386/4850 [11:24<11:47,  3.48it/s]

Epoch: 2, Loss: 4.678455352783203


Processing epoch 01:  49%|████▉     | 2387/4850 [11:24<11:44,  3.50it/s]

Epoch: 2, Loss: 2.628842830657959


Processing epoch 01:  49%|████▉     | 2388/4850 [11:25<11:43,  3.50it/s]

Epoch: 2, Loss: 2.7900748252868652


Processing epoch 01:  49%|████▉     | 2389/4850 [11:25<11:42,  3.50it/s]

Epoch: 2, Loss: 2.8234496116638184


Processing epoch 01:  49%|████▉     | 2390/4850 [11:25<11:45,  3.49it/s]

Epoch: 2, Loss: 1.9739248752593994


Processing epoch 01:  49%|████▉     | 2391/4850 [11:25<11:42,  3.50it/s]

Epoch: 2, Loss: 2.7736024856567383


Processing epoch 01:  49%|████▉     | 2392/4850 [11:26<11:39,  3.51it/s]

Epoch: 2, Loss: 2.7453041076660156


Processing epoch 01:  49%|████▉     | 2393/4850 [11:26<11:39,  3.51it/s]

Epoch: 2, Loss: 3.2717370986938477


Processing epoch 01:  49%|████▉     | 2394/4850 [11:26<11:39,  3.51it/s]

Epoch: 2, Loss: 2.9682226181030273


Processing epoch 01:  49%|████▉     | 2395/4850 [11:27<11:35,  3.53it/s]

Epoch: 2, Loss: 2.719102382659912


Processing epoch 01:  49%|████▉     | 2396/4850 [11:27<11:33,  3.54it/s]

Epoch: 2, Loss: 2.649986743927002


Processing epoch 01:  49%|████▉     | 2397/4850 [11:27<11:45,  3.48it/s]

Epoch: 2, Loss: 2.225881576538086


Processing epoch 01:  49%|████▉     | 2398/4850 [11:27<11:44,  3.48it/s]

Epoch: 2, Loss: 2.701115131378174


Processing epoch 01:  49%|████▉     | 2399/4850 [11:28<11:52,  3.44it/s]

Epoch: 2, Loss: 2.720405101776123


Processing epoch 01:  49%|████▉     | 2400/4850 [11:28<11:44,  3.48it/s]

Epoch: 2, Loss: 2.8102452754974365


Processing epoch 01:  50%|████▉     | 2401/4850 [11:28<12:04,  3.38it/s]

Epoch: 2, Loss: 2.235675096511841


Processing epoch 01:  50%|████▉     | 2402/4850 [11:29<12:08,  3.36it/s]

Epoch: 2, Loss: 2.52492618560791


Processing epoch 01:  50%|████▉     | 2403/4850 [11:29<12:03,  3.38it/s]

Epoch: 2, Loss: 2.6556386947631836


Processing epoch 01:  50%|████▉     | 2404/4850 [11:29<12:07,  3.36it/s]

Epoch: 2, Loss: 2.0510706901550293


Processing epoch 01:  50%|████▉     | 2405/4850 [11:30<12:05,  3.37it/s]

Epoch: 2, Loss: 2.7688181400299072


Processing epoch 01:  50%|████▉     | 2406/4850 [11:30<11:59,  3.39it/s]

Epoch: 2, Loss: 2.6459574699401855


Processing epoch 01:  50%|████▉     | 2407/4850 [11:30<11:46,  3.46it/s]

Epoch: 2, Loss: 3.3384642601013184


Processing epoch 01:  50%|████▉     | 2408/4850 [11:30<11:51,  3.43it/s]

Epoch: 2, Loss: 2.620278835296631


Processing epoch 01:  50%|████▉     | 2409/4850 [11:31<12:00,  3.39it/s]

Epoch: 2, Loss: 2.4772701263427734


Processing epoch 01:  50%|████▉     | 2410/4850 [11:31<11:48,  3.45it/s]

Epoch: 2, Loss: 2.848886489868164


Processing epoch 01:  50%|████▉     | 2411/4850 [11:31<11:42,  3.47it/s]

Epoch: 2, Loss: 2.441877603530884


Processing epoch 01:  50%|████▉     | 2412/4850 [11:32<11:37,  3.50it/s]

Epoch: 2, Loss: 4.13909912109375


Processing epoch 01:  50%|████▉     | 2413/4850 [11:32<11:34,  3.51it/s]

Epoch: 2, Loss: 2.4989805221557617


Processing epoch 01:  50%|████▉     | 2414/4850 [11:32<11:31,  3.52it/s]

Epoch: 2, Loss: 2.361490249633789


Processing epoch 01:  50%|████▉     | 2415/4850 [11:32<11:30,  3.53it/s]

Epoch: 2, Loss: 3.1125080585479736


Processing epoch 01:  50%|████▉     | 2416/4850 [11:33<11:32,  3.51it/s]

Epoch: 2, Loss: 2.122572422027588


Processing epoch 01:  50%|████▉     | 2417/4850 [11:33<11:31,  3.52it/s]

Epoch: 2, Loss: 2.166701316833496


Processing epoch 01:  50%|████▉     | 2418/4850 [11:33<11:28,  3.53it/s]

Epoch: 2, Loss: 2.302353858947754


Processing epoch 01:  50%|████▉     | 2419/4850 [11:34<11:35,  3.50it/s]

Epoch: 2, Loss: 3.717813730239868


Processing epoch 01:  50%|████▉     | 2420/4850 [11:34<11:31,  3.51it/s]

Epoch: 2, Loss: 2.939335346221924


Processing epoch 01:  50%|████▉     | 2421/4850 [11:34<11:30,  3.52it/s]

Epoch: 2, Loss: 2.351299285888672


Processing epoch 01:  50%|████▉     | 2422/4850 [11:34<11:27,  3.53it/s]

Epoch: 2, Loss: 2.7098140716552734


Processing epoch 01:  50%|████▉     | 2423/4850 [11:35<11:33,  3.50it/s]

Epoch: 2, Loss: 2.7493228912353516


Processing epoch 01:  50%|████▉     | 2424/4850 [11:35<11:30,  3.51it/s]

Epoch: 2, Loss: 2.7564525604248047


Processing epoch 01:  50%|█████     | 2425/4850 [11:35<11:29,  3.52it/s]

Epoch: 2, Loss: 2.3613290786743164


Processing epoch 01:  50%|█████     | 2426/4850 [11:36<11:29,  3.52it/s]

Epoch: 2, Loss: 1.990990161895752


Processing epoch 01:  50%|█████     | 2427/4850 [11:36<11:25,  3.53it/s]

Epoch: 2, Loss: 2.2539453506469727


Processing epoch 01:  50%|█████     | 2428/4850 [11:36<11:25,  3.53it/s]

Epoch: 2, Loss: 2.4404098987579346


Processing epoch 01:  50%|█████     | 2429/4850 [11:36<11:28,  3.52it/s]

Epoch: 2, Loss: 2.2090203762054443


Processing epoch 01:  50%|█████     | 2430/4850 [11:37<11:35,  3.48it/s]

Epoch: 2, Loss: 2.6964714527130127


Processing epoch 01:  50%|█████     | 2431/4850 [11:37<11:38,  3.46it/s]

Epoch: 2, Loss: 2.356369972229004


Processing epoch 01:  50%|█████     | 2432/4850 [11:37<11:36,  3.47it/s]

Epoch: 2, Loss: 2.11545467376709


Processing epoch 01:  50%|█████     | 2433/4850 [11:38<11:37,  3.46it/s]

Epoch: 2, Loss: 2.619473695755005


Processing epoch 01:  50%|█████     | 2434/4850 [11:38<11:34,  3.48it/s]

Epoch: 2, Loss: 3.149334192276001


Processing epoch 01:  50%|█████     | 2435/4850 [11:38<11:28,  3.51it/s]

Epoch: 2, Loss: 3.5835063457489014


Processing epoch 01:  50%|█████     | 2436/4850 [11:38<11:25,  3.52it/s]

Epoch: 2, Loss: 3.133105754852295


Processing epoch 01:  50%|█████     | 2437/4850 [11:39<11:26,  3.51it/s]

Epoch: 2, Loss: 2.4286346435546875


Processing epoch 01:  50%|█████     | 2438/4850 [11:39<11:26,  3.51it/s]

Epoch: 2, Loss: 2.524580955505371


Processing epoch 01:  50%|█████     | 2439/4850 [11:39<11:22,  3.53it/s]

Epoch: 2, Loss: 2.571833372116089


Processing epoch 01:  50%|█████     | 2440/4850 [11:40<11:20,  3.54it/s]

Epoch: 2, Loss: 2.2268002033233643


Processing epoch 01:  50%|█████     | 2441/4850 [11:40<11:20,  3.54it/s]

Epoch: 2, Loss: 2.9073023796081543


Processing epoch 01:  50%|█████     | 2442/4850 [11:40<11:21,  3.53it/s]

Epoch: 2, Loss: 2.566793203353882


Processing epoch 01:  50%|█████     | 2443/4850 [11:40<11:21,  3.53it/s]

Epoch: 2, Loss: 3.2754099369049072


Processing epoch 01:  50%|█████     | 2444/4850 [11:41<11:24,  3.52it/s]

Epoch: 2, Loss: 2.3064074516296387


Processing epoch 01:  50%|█████     | 2445/4850 [11:41<11:23,  3.52it/s]

Epoch: 2, Loss: 2.6989760398864746


Processing epoch 01:  50%|█████     | 2446/4850 [11:41<11:22,  3.52it/s]

Epoch: 2, Loss: 2.389237642288208


Processing epoch 01:  50%|█████     | 2447/4850 [11:42<11:36,  3.45it/s]

Epoch: 2, Loss: 2.189251661300659


Processing epoch 01:  50%|█████     | 2448/4850 [11:42<11:42,  3.42it/s]

Epoch: 2, Loss: 2.437791585922241


Processing epoch 01:  50%|█████     | 2449/4850 [11:42<11:39,  3.43it/s]

Epoch: 2, Loss: 2.312141180038452


Processing epoch 01:  51%|█████     | 2450/4850 [11:42<11:49,  3.38it/s]

Epoch: 2, Loss: 2.211566209793091


Processing epoch 01:  51%|█████     | 2451/4850 [11:43<11:55,  3.35it/s]

Epoch: 2, Loss: 2.4385786056518555


Processing epoch 01:  51%|█████     | 2452/4850 [11:43<11:58,  3.34it/s]

Epoch: 2, Loss: 2.584472894668579


Processing epoch 01:  51%|█████     | 2453/4850 [11:43<11:53,  3.36it/s]

Epoch: 2, Loss: 2.700998306274414


Processing epoch 01:  51%|█████     | 2454/4850 [11:44<11:48,  3.38it/s]

Epoch: 2, Loss: 3.1928374767303467


Processing epoch 01:  51%|█████     | 2455/4850 [11:44<11:50,  3.37it/s]

Epoch: 2, Loss: 2.5540318489074707


Processing epoch 01:  51%|█████     | 2456/4850 [11:44<11:55,  3.35it/s]

Epoch: 2, Loss: 2.513134002685547


Processing epoch 01:  51%|█████     | 2457/4850 [11:45<11:57,  3.34it/s]

Epoch: 2, Loss: 2.5807700157165527


Processing epoch 01:  51%|█████     | 2458/4850 [11:45<12:02,  3.31it/s]

Epoch: 2, Loss: 2.7850794792175293


Processing epoch 01:  51%|█████     | 2459/4850 [11:45<11:59,  3.32it/s]

Epoch: 2, Loss: 2.6832268238067627


Processing epoch 01:  51%|█████     | 2460/4850 [11:45<11:51,  3.36it/s]

Epoch: 2, Loss: 2.5949273109436035


Processing epoch 01:  51%|█████     | 2461/4850 [11:46<11:43,  3.40it/s]

Epoch: 2, Loss: 2.8144659996032715


Processing epoch 01:  51%|█████     | 2462/4850 [11:46<11:36,  3.43it/s]

Epoch: 2, Loss: 2.4579453468322754


Processing epoch 01:  51%|█████     | 2463/4850 [11:46<11:27,  3.47it/s]

Epoch: 2, Loss: 2.6725544929504395


Processing epoch 01:  51%|█████     | 2464/4850 [11:47<11:24,  3.49it/s]

Epoch: 2, Loss: 3.2776670455932617


Processing epoch 01:  51%|█████     | 2465/4850 [11:47<11:24,  3.48it/s]

Epoch: 2, Loss: 2.5442333221435547


Processing epoch 01:  51%|█████     | 2466/4850 [11:47<11:28,  3.47it/s]

Epoch: 2, Loss: 2.135089874267578


Processing epoch 01:  51%|█████     | 2467/4850 [11:47<11:21,  3.49it/s]

Epoch: 2, Loss: 2.1608524322509766


Processing epoch 01:  51%|█████     | 2468/4850 [11:48<11:19,  3.51it/s]

Epoch: 2, Loss: 2.7627549171447754


Processing epoch 01:  51%|█████     | 2469/4850 [11:48<11:16,  3.52it/s]

Epoch: 2, Loss: 2.3835926055908203


Processing epoch 01:  51%|█████     | 2470/4850 [11:48<11:13,  3.54it/s]

Epoch: 2, Loss: 3.2214887142181396


Processing epoch 01:  51%|█████     | 2471/4850 [11:49<11:14,  3.53it/s]

Epoch: 2, Loss: 2.8034257888793945


Processing epoch 01:  51%|█████     | 2472/4850 [11:49<11:14,  3.53it/s]

Epoch: 2, Loss: 2.6691441535949707


Processing epoch 01:  51%|█████     | 2473/4850 [11:49<11:12,  3.53it/s]

Epoch: 2, Loss: 2.2986884117126465


Processing epoch 01:  51%|█████     | 2474/4850 [11:49<11:11,  3.54it/s]

Epoch: 2, Loss: 2.661372661590576


Processing epoch 01:  51%|█████     | 2475/4850 [11:50<11:14,  3.52it/s]

Epoch: 2, Loss: 3.0576834678649902


Processing epoch 01:  51%|█████     | 2476/4850 [11:50<11:11,  3.54it/s]

Epoch: 2, Loss: 2.1269335746765137


Processing epoch 01:  51%|█████     | 2477/4850 [11:50<11:15,  3.51it/s]

Epoch: 2, Loss: 2.8090291023254395


Processing epoch 01:  51%|█████     | 2478/4850 [11:51<11:13,  3.52it/s]

Epoch: 2, Loss: 2.7812039852142334


Processing epoch 01:  51%|█████     | 2479/4850 [11:51<11:11,  3.53it/s]

Epoch: 2, Loss: 2.73500919342041


Processing epoch 01:  51%|█████     | 2480/4850 [11:51<11:09,  3.54it/s]

Epoch: 2, Loss: 2.9755523204803467


Processing epoch 01:  51%|█████     | 2481/4850 [11:51<11:10,  3.54it/s]

Epoch: 2, Loss: 2.776667594909668


Processing epoch 01:  51%|█████     | 2482/4850 [11:52<11:09,  3.53it/s]

Epoch: 2, Loss: 2.5989367961883545


Processing epoch 01:  51%|█████     | 2483/4850 [11:52<11:11,  3.53it/s]

Epoch: 2, Loss: 2.676638603210449


Processing epoch 01:  51%|█████     | 2484/4850 [11:52<11:12,  3.52it/s]

Epoch: 2, Loss: 2.5211987495422363


Processing epoch 01:  51%|█████     | 2485/4850 [11:53<11:13,  3.51it/s]

Epoch: 2, Loss: 2.676086902618408


Processing epoch 01:  51%|█████▏    | 2486/4850 [11:53<11:11,  3.52it/s]

Epoch: 2, Loss: 1.842178463935852


Processing epoch 01:  51%|█████▏    | 2487/4850 [11:53<11:12,  3.51it/s]

Epoch: 2, Loss: 1.9557442665100098


Processing epoch 01:  51%|█████▏    | 2488/4850 [11:53<11:09,  3.53it/s]

Epoch: 2, Loss: 2.691499948501587


Processing epoch 01:  51%|█████▏    | 2489/4850 [11:54<11:08,  3.53it/s]

Epoch: 2, Loss: 2.8508195877075195


Processing epoch 01:  51%|█████▏    | 2490/4850 [11:54<11:06,  3.54it/s]

Epoch: 2, Loss: 2.219949245452881


Processing epoch 01:  51%|█████▏    | 2491/4850 [11:54<11:07,  3.54it/s]

Epoch: 2, Loss: 2.273460626602173


Processing epoch 01:  51%|█████▏    | 2492/4850 [11:55<11:07,  3.53it/s]

Epoch: 2, Loss: 2.497880458831787


Processing epoch 01:  51%|█████▏    | 2493/4850 [11:55<11:05,  3.54it/s]

Epoch: 2, Loss: 2.877985954284668


Processing epoch 01:  51%|█████▏    | 2494/4850 [11:55<11:05,  3.54it/s]

Epoch: 2, Loss: 2.5621752738952637


Processing epoch 01:  51%|█████▏    | 2495/4850 [11:55<11:18,  3.47it/s]

Epoch: 2, Loss: 2.4646682739257812


Processing epoch 01:  51%|█████▏    | 2496/4850 [11:56<11:19,  3.47it/s]

Epoch: 2, Loss: 2.690368175506592


Processing epoch 01:  51%|█████▏    | 2497/4850 [11:56<11:15,  3.48it/s]

Epoch: 2, Loss: 2.8063931465148926


Processing epoch 01:  52%|█████▏    | 2498/4850 [11:56<11:10,  3.51it/s]

Epoch: 2, Loss: 3.1398916244506836


Processing epoch 01:  52%|█████▏    | 2499/4850 [11:57<11:18,  3.47it/s]

Epoch: 2, Loss: 2.728912353515625


Processing epoch 01:  52%|█████▏    | 2500/4850 [11:57<11:20,  3.45it/s]

Epoch: 2, Loss: 2.455091953277588


Processing epoch 01:  52%|█████▏    | 2501/4850 [11:57<11:24,  3.43it/s]

Epoch: 2, Loss: 2.4898157119750977


Processing epoch 01:  52%|█████▏    | 2502/4850 [11:57<11:29,  3.41it/s]

Epoch: 2, Loss: 2.0812106132507324


Processing epoch 01:  52%|█████▏    | 2503/4850 [11:58<11:30,  3.40it/s]

Epoch: 2, Loss: 3.191030740737915


Processing epoch 01:  52%|█████▏    | 2504/4850 [11:58<11:34,  3.38it/s]

Epoch: 2, Loss: 3.0964906215667725


Processing epoch 01:  52%|█████▏    | 2505/4850 [11:58<11:27,  3.41it/s]

Epoch: 2, Loss: 3.096215009689331


Processing epoch 01:  52%|█████▏    | 2506/4850 [11:59<11:34,  3.37it/s]

Epoch: 2, Loss: 3.0484442710876465


Processing epoch 01:  52%|█████▏    | 2507/4850 [11:59<11:28,  3.41it/s]

Epoch: 2, Loss: 2.02341628074646


Processing epoch 01:  52%|█████▏    | 2508/4850 [11:59<11:22,  3.43it/s]

Epoch: 2, Loss: 2.459022283554077


Processing epoch 01:  52%|█████▏    | 2509/4850 [12:00<11:37,  3.36it/s]

Epoch: 2, Loss: 2.8654048442840576


Processing epoch 01:  52%|█████▏    | 2510/4850 [12:00<11:28,  3.40it/s]

Epoch: 2, Loss: 3.2342677116394043


Processing epoch 01:  52%|█████▏    | 2511/4850 [12:00<11:20,  3.44it/s]

Epoch: 2, Loss: 2.452881336212158


Processing epoch 01:  52%|█████▏    | 2512/4850 [12:00<11:14,  3.47it/s]

Epoch: 2, Loss: 3.1167025566101074


Processing epoch 01:  52%|█████▏    | 2513/4850 [12:01<11:10,  3.48it/s]

Epoch: 2, Loss: 2.2495017051696777


Processing epoch 01:  52%|█████▏    | 2514/4850 [12:01<11:08,  3.49it/s]

Epoch: 2, Loss: 2.3733625411987305


Processing epoch 01:  52%|█████▏    | 2515/4850 [12:01<11:07,  3.50it/s]

Epoch: 2, Loss: 2.3574395179748535


Processing epoch 01:  52%|█████▏    | 2516/4850 [12:01<11:04,  3.51it/s]

Epoch: 2, Loss: 2.3343734741210938


Processing epoch 01:  52%|█████▏    | 2517/4850 [12:02<11:03,  3.52it/s]

Epoch: 2, Loss: 1.9723118543624878


Processing epoch 01:  52%|█████▏    | 2518/4850 [12:02<11:06,  3.50it/s]

Epoch: 2, Loss: 2.3385848999023438


Processing epoch 01:  52%|█████▏    | 2519/4850 [12:02<11:00,  3.53it/s]

Epoch: 2, Loss: 2.3150057792663574


Processing epoch 01:  52%|█████▏    | 2520/4850 [12:03<10:59,  3.53it/s]

Epoch: 2, Loss: 2.5225160121917725


Processing epoch 01:  52%|█████▏    | 2521/4850 [12:03<11:10,  3.48it/s]

Epoch: 2, Loss: 2.3699564933776855


Processing epoch 01:  52%|█████▏    | 2522/4850 [12:03<11:05,  3.50it/s]

Epoch: 2, Loss: 3.621915340423584


Processing epoch 01:  52%|█████▏    | 2523/4850 [12:03<11:05,  3.50it/s]

Epoch: 2, Loss: 2.3999996185302734


Processing epoch 01:  52%|█████▏    | 2524/4850 [12:04<11:06,  3.49it/s]

Epoch: 2, Loss: 2.5496439933776855


Processing epoch 01:  52%|█████▏    | 2525/4850 [12:04<11:02,  3.51it/s]

Epoch: 2, Loss: 2.5245344638824463


Processing epoch 01:  52%|█████▏    | 2526/4850 [12:04<11:01,  3.51it/s]

Epoch: 2, Loss: 2.3295514583587646


Processing epoch 01:  52%|█████▏    | 2527/4850 [12:05<11:00,  3.52it/s]

Epoch: 2, Loss: 2.8329782485961914


Processing epoch 01:  52%|█████▏    | 2528/4850 [12:05<10:59,  3.52it/s]

Epoch: 2, Loss: 2.8937556743621826


Processing epoch 01:  52%|█████▏    | 2529/4850 [12:05<11:00,  3.52it/s]

Epoch: 2, Loss: 2.623316764831543


Processing epoch 01:  52%|█████▏    | 2530/4850 [12:05<10:56,  3.53it/s]

Epoch: 2, Loss: 3.205066680908203


Processing epoch 01:  52%|█████▏    | 2531/4850 [12:06<10:57,  3.53it/s]

Epoch: 2, Loss: 2.8732049465179443


Processing epoch 01:  52%|█████▏    | 2532/4850 [12:06<11:03,  3.49it/s]

Epoch: 2, Loss: 2.5702433586120605


Processing epoch 01:  52%|█████▏    | 2533/4850 [12:06<11:02,  3.50it/s]

Epoch: 2, Loss: 2.7059898376464844


Processing epoch 01:  52%|█████▏    | 2534/4850 [12:07<10:56,  3.53it/s]

Epoch: 2, Loss: 3.4278712272644043


Processing epoch 01:  52%|█████▏    | 2535/4850 [12:07<10:59,  3.51it/s]

Epoch: 2, Loss: 2.264192581176758


Processing epoch 01:  52%|█████▏    | 2536/4850 [12:07<11:02,  3.49it/s]

Epoch: 2, Loss: 2.19179630279541


Processing epoch 01:  52%|█████▏    | 2537/4850 [12:07<11:00,  3.50it/s]

Epoch: 2, Loss: 2.2347354888916016


Processing epoch 01:  52%|█████▏    | 2538/4850 [12:08<11:01,  3.49it/s]

Epoch: 2, Loss: 2.6208934783935547


Processing epoch 01:  52%|█████▏    | 2539/4850 [12:08<10:59,  3.50it/s]

Epoch: 2, Loss: 2.4992823600769043


Processing epoch 01:  52%|█████▏    | 2540/4850 [12:08<10:56,  3.52it/s]

Epoch: 2, Loss: 2.2959024906158447


Processing epoch 01:  52%|█████▏    | 2541/4850 [12:09<10:56,  3.52it/s]

Epoch: 2, Loss: 2.7166922092437744


Processing epoch 01:  52%|█████▏    | 2542/4850 [12:09<10:56,  3.52it/s]

Epoch: 2, Loss: 2.3686251640319824


Processing epoch 01:  52%|█████▏    | 2543/4850 [12:09<11:07,  3.46it/s]

Epoch: 2, Loss: 2.2309186458587646


Processing epoch 01:  52%|█████▏    | 2544/4850 [12:09<11:02,  3.48it/s]

Epoch: 2, Loss: 2.3951547145843506


Processing epoch 01:  52%|█████▏    | 2545/4850 [12:10<11:00,  3.49it/s]

Epoch: 2, Loss: 3.8522799015045166


Processing epoch 01:  52%|█████▏    | 2546/4850 [12:10<11:08,  3.44it/s]

Epoch: 2, Loss: 2.69783353805542


Processing epoch 01:  53%|█████▎    | 2547/4850 [12:10<11:12,  3.42it/s]

Epoch: 2, Loss: 2.7092981338500977


Processing epoch 01:  53%|█████▎    | 2548/4850 [12:11<11:08,  3.45it/s]

Epoch: 2, Loss: 2.7652649879455566


Processing epoch 01:  53%|█████▎    | 2549/4850 [12:11<11:07,  3.44it/s]

Epoch: 2, Loss: 2.007235050201416


Processing epoch 01:  53%|█████▎    | 2550/4850 [12:11<11:10,  3.43it/s]

Epoch: 2, Loss: 1.97715163230896


Processing epoch 01:  53%|█████▎    | 2551/4850 [12:12<11:17,  3.39it/s]

Epoch: 2, Loss: 2.7696428298950195


Processing epoch 01:  53%|█████▎    | 2552/4850 [12:12<11:10,  3.43it/s]

Epoch: 2, Loss: 3.142767906188965


Processing epoch 01:  53%|█████▎    | 2553/4850 [12:12<11:05,  3.45it/s]

Epoch: 2, Loss: 3.6060729026794434


Processing epoch 01:  53%|█████▎    | 2554/4850 [12:12<11:05,  3.45it/s]

Epoch: 2, Loss: 2.3864285945892334


Processing epoch 01:  53%|█████▎    | 2555/4850 [12:13<11:08,  3.43it/s]

Epoch: 2, Loss: 2.8572182655334473


Processing epoch 01:  53%|█████▎    | 2556/4850 [12:13<11:10,  3.42it/s]

Epoch: 2, Loss: 2.650130033493042


Processing epoch 01:  53%|█████▎    | 2557/4850 [12:13<11:10,  3.42it/s]

Epoch: 2, Loss: 2.658938407897949


Processing epoch 01:  53%|█████▎    | 2558/4850 [12:14<11:07,  3.43it/s]

Epoch: 2, Loss: 1.9436089992523193


Processing epoch 01:  53%|█████▎    | 2559/4850 [12:14<11:19,  3.37it/s]

Epoch: 2, Loss: 2.4194982051849365


Processing epoch 01:  53%|█████▎    | 2560/4850 [12:14<11:24,  3.34it/s]

Epoch: 2, Loss: 2.4878993034362793


Processing epoch 01:  53%|█████▎    | 2561/4850 [12:14<11:23,  3.35it/s]

Epoch: 2, Loss: 2.437312602996826


Processing epoch 01:  53%|█████▎    | 2562/4850 [12:15<11:15,  3.39it/s]

Epoch: 2, Loss: 2.8762834072113037


Processing epoch 01:  53%|█████▎    | 2563/4850 [12:15<11:09,  3.41it/s]

Epoch: 2, Loss: 2.589038372039795


Processing epoch 01:  53%|█████▎    | 2564/4850 [12:15<11:00,  3.46it/s]

Epoch: 2, Loss: 2.6474008560180664


Processing epoch 01:  53%|█████▎    | 2565/4850 [12:16<10:53,  3.50it/s]

Epoch: 2, Loss: 2.8627095222473145


Processing epoch 01:  53%|█████▎    | 2566/4850 [12:16<10:51,  3.51it/s]

Epoch: 2, Loss: 2.8021950721740723


Processing epoch 01:  53%|█████▎    | 2567/4850 [12:16<10:49,  3.51it/s]

Epoch: 2, Loss: 3.0636250972747803


Processing epoch 01:  53%|█████▎    | 2568/4850 [12:16<10:47,  3.53it/s]

Epoch: 2, Loss: 3.370861053466797


Processing epoch 01:  53%|█████▎    | 2569/4850 [12:17<10:47,  3.52it/s]

Epoch: 2, Loss: 2.072413682937622


Processing epoch 01:  53%|█████▎    | 2570/4850 [12:17<10:44,  3.54it/s]

Epoch: 2, Loss: 2.491849899291992


Processing epoch 01:  53%|█████▎    | 2571/4850 [12:17<10:42,  3.55it/s]

Epoch: 2, Loss: 3.3389339447021484


Processing epoch 01:  53%|█████▎    | 2572/4850 [12:18<10:53,  3.48it/s]

Epoch: 2, Loss: 2.750725507736206


Processing epoch 01:  53%|█████▎    | 2573/4850 [12:18<10:51,  3.49it/s]

Epoch: 2, Loss: 2.3526625633239746


Processing epoch 01:  53%|█████▎    | 2574/4850 [12:18<10:51,  3.49it/s]

Epoch: 2, Loss: 2.8270018100738525


Processing epoch 01:  53%|█████▎    | 2575/4850 [12:18<10:46,  3.52it/s]

Epoch: 2, Loss: 3.212340831756592


Processing epoch 01:  53%|█████▎    | 2576/4850 [12:19<10:46,  3.52it/s]

Epoch: 2, Loss: 2.48862624168396


Processing epoch 01:  53%|█████▎    | 2577/4850 [12:19<10:44,  3.53it/s]

Epoch: 2, Loss: 2.5056939125061035


Processing epoch 01:  53%|█████▎    | 2578/4850 [12:19<10:44,  3.53it/s]

Epoch: 2, Loss: 2.5282037258148193


Processing epoch 01:  53%|█████▎    | 2579/4850 [12:20<10:42,  3.53it/s]

Epoch: 2, Loss: 2.6236326694488525


Processing epoch 01:  53%|█████▎    | 2580/4850 [12:20<10:45,  3.52it/s]

Epoch: 2, Loss: 2.5549631118774414


Processing epoch 01:  53%|█████▎    | 2581/4850 [12:20<10:44,  3.52it/s]

Epoch: 2, Loss: 2.2055718898773193


Processing epoch 01:  53%|█████▎    | 2582/4850 [12:20<10:43,  3.53it/s]

Epoch: 2, Loss: 2.8192648887634277


Processing epoch 01:  53%|█████▎    | 2583/4850 [12:21<10:50,  3.48it/s]

Epoch: 2, Loss: 2.187244176864624


Processing epoch 01:  53%|█████▎    | 2584/4850 [12:21<10:42,  3.53it/s]

Epoch: 2, Loss: 2.643907308578491


Processing epoch 01:  53%|█████▎    | 2585/4850 [12:21<10:40,  3.53it/s]

Epoch: 2, Loss: 2.601564645767212


Processing epoch 01:  53%|█████▎    | 2586/4850 [12:22<10:40,  3.54it/s]

Epoch: 2, Loss: 2.536203384399414


Processing epoch 01:  53%|█████▎    | 2587/4850 [12:22<10:43,  3.52it/s]

Epoch: 2, Loss: 2.41910982131958


Processing epoch 01:  53%|█████▎    | 2588/4850 [12:22<10:40,  3.53it/s]

Epoch: 2, Loss: 2.2936737537384033


Processing epoch 01:  53%|█████▎    | 2589/4850 [12:22<10:40,  3.53it/s]

Epoch: 2, Loss: 2.265218496322632


Processing epoch 01:  53%|█████▎    | 2590/4850 [12:23<10:41,  3.52it/s]

Epoch: 2, Loss: 2.6929285526275635


Processing epoch 01:  53%|█████▎    | 2591/4850 [12:23<10:41,  3.52it/s]

Epoch: 2, Loss: 2.3416454792022705


Processing epoch 01:  53%|█████▎    | 2592/4850 [12:23<10:39,  3.53it/s]

Epoch: 2, Loss: 2.476469039916992


Processing epoch 01:  53%|█████▎    | 2593/4850 [12:24<10:42,  3.51it/s]

Epoch: 2, Loss: 2.2464942932128906


Processing epoch 01:  53%|█████▎    | 2594/4850 [12:24<10:43,  3.51it/s]

Epoch: 2, Loss: 2.533403158187866


Processing epoch 01:  54%|█████▎    | 2595/4850 [12:24<10:39,  3.52it/s]

Epoch: 2, Loss: 2.200331449508667


Processing epoch 01:  54%|█████▎    | 2596/4850 [12:24<10:46,  3.49it/s]

Epoch: 2, Loss: 2.5714023113250732


Processing epoch 01:  54%|█████▎    | 2597/4850 [12:25<10:44,  3.50it/s]

Epoch: 2, Loss: 2.5997819900512695


Processing epoch 01:  54%|█████▎    | 2598/4850 [12:25<10:43,  3.50it/s]

Epoch: 2, Loss: 2.273892641067505


Processing epoch 01:  54%|█████▎    | 2599/4850 [12:25<10:40,  3.52it/s]

Epoch: 2, Loss: 2.518810987472534


Processing epoch 01:  54%|█████▎    | 2600/4850 [12:26<10:38,  3.52it/s]

Epoch: 2, Loss: 2.8264684677124023


Processing epoch 01:  54%|█████▎    | 2601/4850 [12:26<10:40,  3.51it/s]

Epoch: 2, Loss: 2.507042169570923


Processing epoch 01:  54%|█████▎    | 2602/4850 [12:26<10:35,  3.54it/s]

Epoch: 2, Loss: 2.713892936706543


Processing epoch 01:  54%|█████▎    | 2603/4850 [12:26<10:32,  3.55it/s]

Epoch: 2, Loss: 2.2648837566375732


Processing epoch 01:  54%|█████▎    | 2604/4850 [12:27<10:33,  3.55it/s]

Epoch: 2, Loss: 2.31758451461792


Processing epoch 01:  54%|█████▎    | 2605/4850 [12:27<10:55,  3.43it/s]

Epoch: 2, Loss: 2.3918838500976562


Processing epoch 01:  54%|█████▎    | 2606/4850 [12:27<10:58,  3.41it/s]

Epoch: 2, Loss: 2.662914276123047


Processing epoch 01:  54%|█████▍    | 2607/4850 [12:28<10:55,  3.42it/s]

Epoch: 2, Loss: 3.3723583221435547


Processing epoch 01:  54%|█████▍    | 2608/4850 [12:28<10:55,  3.42it/s]

Epoch: 2, Loss: 2.8134803771972656


Processing epoch 01:  54%|█████▍    | 2609/4850 [12:28<11:05,  3.37it/s]

Epoch: 2, Loss: 2.421154499053955


Processing epoch 01:  54%|█████▍    | 2610/4850 [12:28<11:02,  3.38it/s]

Epoch: 2, Loss: 2.8147690296173096


Processing epoch 01:  54%|█████▍    | 2611/4850 [12:29<10:55,  3.41it/s]

Epoch: 2, Loss: 2.5449302196502686


Processing epoch 01:  54%|█████▍    | 2612/4850 [12:29<10:54,  3.42it/s]

Epoch: 2, Loss: 2.823857307434082


Processing epoch 01:  54%|█████▍    | 2613/4850 [12:29<11:06,  3.36it/s]

Epoch: 2, Loss: 2.1151254177093506


Processing epoch 01:  54%|█████▍    | 2614/4850 [12:30<10:57,  3.40it/s]

Epoch: 2, Loss: 2.8987417221069336


Processing epoch 01:  54%|█████▍    | 2615/4850 [12:30<10:49,  3.44it/s]

Epoch: 2, Loss: 3.16094970703125


Processing epoch 01:  54%|█████▍    | 2616/4850 [12:30<10:47,  3.45it/s]

Epoch: 2, Loss: 2.350083112716675


Processing epoch 01:  54%|█████▍    | 2617/4850 [12:31<10:42,  3.47it/s]

Epoch: 2, Loss: 2.673542022705078


Processing epoch 01:  54%|█████▍    | 2618/4850 [12:31<10:40,  3.49it/s]

Epoch: 2, Loss: 2.2335169315338135


Processing epoch 01:  54%|█████▍    | 2619/4850 [12:31<10:41,  3.48it/s]

Epoch: 2, Loss: 2.5698115825653076


Processing epoch 01:  54%|█████▍    | 2620/4850 [12:31<10:37,  3.50it/s]

Epoch: 2, Loss: 3.4393181800842285


Processing epoch 01:  54%|█████▍    | 2621/4850 [12:32<10:38,  3.49it/s]

Epoch: 2, Loss: 2.378431797027588


Processing epoch 01:  54%|█████▍    | 2622/4850 [12:32<10:38,  3.49it/s]

Epoch: 2, Loss: 2.100038766860962


Processing epoch 01:  54%|█████▍    | 2623/4850 [12:32<10:48,  3.43it/s]

Epoch: 2, Loss: 2.087001085281372


Processing epoch 01:  54%|█████▍    | 2624/4850 [12:33<10:44,  3.45it/s]

Epoch: 2, Loss: 2.5535800457000732


Processing epoch 01:  54%|█████▍    | 2625/4850 [12:33<10:39,  3.48it/s]

Epoch: 2, Loss: 2.1886165142059326


Processing epoch 01:  54%|█████▍    | 2626/4850 [12:33<10:37,  3.49it/s]

Epoch: 2, Loss: 2.115713119506836


Processing epoch 01:  54%|█████▍    | 2627/4850 [12:33<10:36,  3.49it/s]

Epoch: 2, Loss: 3.4101696014404297


Processing epoch 01:  54%|█████▍    | 2628/4850 [12:34<10:34,  3.50it/s]

Epoch: 2, Loss: 2.4352715015411377


Processing epoch 01:  54%|█████▍    | 2629/4850 [12:34<10:34,  3.50it/s]

Epoch: 2, Loss: 2.6923961639404297


Processing epoch 01:  54%|█████▍    | 2630/4850 [12:34<10:33,  3.50it/s]

Epoch: 2, Loss: 2.227774143218994


Processing epoch 01:  54%|█████▍    | 2631/4850 [12:35<10:30,  3.52it/s]

Epoch: 2, Loss: 2.3708643913269043


Processing epoch 01:  54%|█████▍    | 2632/4850 [12:35<10:30,  3.52it/s]

Epoch: 2, Loss: 2.1472463607788086


Processing epoch 01:  54%|█████▍    | 2633/4850 [12:35<10:29,  3.52it/s]

Epoch: 2, Loss: 2.9021785259246826


Processing epoch 01:  54%|█████▍    | 2634/4850 [12:35<10:37,  3.48it/s]

Epoch: 2, Loss: 2.4093968868255615


Processing epoch 01:  54%|█████▍    | 2635/4850 [12:36<10:33,  3.50it/s]

Epoch: 2, Loss: 2.583841323852539


Processing epoch 01:  54%|█████▍    | 2636/4850 [12:36<10:35,  3.48it/s]

Epoch: 2, Loss: 3.213371753692627


Processing epoch 01:  54%|█████▍    | 2637/4850 [12:36<10:39,  3.46it/s]

Epoch: 2, Loss: 2.5155861377716064


Processing epoch 01:  54%|█████▍    | 2638/4850 [12:37<10:34,  3.49it/s]

Epoch: 2, Loss: 2.9099245071411133


Processing epoch 01:  54%|█████▍    | 2639/4850 [12:37<10:32,  3.50it/s]

Epoch: 2, Loss: 2.528524398803711


Processing epoch 01:  54%|█████▍    | 2640/4850 [12:37<10:29,  3.51it/s]

Epoch: 2, Loss: 2.5963311195373535


Processing epoch 01:  54%|█████▍    | 2641/4850 [12:37<10:26,  3.52it/s]

Epoch: 2, Loss: 3.335218906402588


Processing epoch 01:  54%|█████▍    | 2642/4850 [12:38<10:25,  3.53it/s]

Epoch: 2, Loss: 2.7715368270874023


Processing epoch 01:  54%|█████▍    | 2643/4850 [12:38<10:24,  3.53it/s]

Epoch: 2, Loss: 2.471005916595459


Processing epoch 01:  55%|█████▍    | 2644/4850 [12:38<10:27,  3.52it/s]

Epoch: 2, Loss: 2.6670961380004883


Processing epoch 01:  55%|█████▍    | 2645/4850 [12:39<10:29,  3.50it/s]

Epoch: 2, Loss: 2.419609546661377


Processing epoch 01:  55%|█████▍    | 2646/4850 [12:39<10:26,  3.52it/s]

Epoch: 2, Loss: 3.740356922149658


Processing epoch 01:  55%|█████▍    | 2647/4850 [12:39<10:22,  3.54it/s]

Epoch: 2, Loss: 2.4187421798706055


Processing epoch 01:  55%|█████▍    | 2648/4850 [12:39<10:23,  3.53it/s]

Epoch: 2, Loss: 2.457166910171509


Processing epoch 01:  55%|█████▍    | 2649/4850 [12:40<10:21,  3.54it/s]

Epoch: 2, Loss: 2.3943371772766113


Processing epoch 01:  55%|█████▍    | 2650/4850 [12:40<10:33,  3.47it/s]

Epoch: 2, Loss: 2.334559440612793


Processing epoch 01:  55%|█████▍    | 2651/4850 [12:40<10:45,  3.41it/s]

Epoch: 2, Loss: 2.112583637237549


Processing epoch 01:  55%|█████▍    | 2652/4850 [12:41<10:40,  3.43it/s]

Epoch: 2, Loss: 2.354336738586426


Processing epoch 01:  55%|█████▍    | 2653/4850 [12:41<10:35,  3.46it/s]

Epoch: 2, Loss: 2.7007951736450195


Processing epoch 01:  55%|█████▍    | 2654/4850 [12:41<10:31,  3.48it/s]

Epoch: 2, Loss: 2.0215773582458496


Processing epoch 01:  55%|█████▍    | 2655/4850 [12:41<10:32,  3.47it/s]

Epoch: 2, Loss: 2.3001821041107178


Processing epoch 01:  55%|█████▍    | 2656/4850 [12:42<10:41,  3.42it/s]

Epoch: 2, Loss: 2.309711456298828


Processing epoch 01:  55%|█████▍    | 2657/4850 [12:42<10:37,  3.44it/s]

Epoch: 2, Loss: 2.634552001953125


Processing epoch 01:  55%|█████▍    | 2658/4850 [12:42<10:46,  3.39it/s]

Epoch: 2, Loss: 2.6698451042175293


Processing epoch 01:  55%|█████▍    | 2659/4850 [12:43<10:47,  3.38it/s]

Epoch: 2, Loss: 2.23581600189209


Processing epoch 01:  55%|█████▍    | 2660/4850 [12:43<10:54,  3.35it/s]

Epoch: 2, Loss: 2.8728859424591064


Processing epoch 01:  55%|█████▍    | 2661/4850 [12:43<10:54,  3.34it/s]

Epoch: 2, Loss: 2.9066524505615234


Processing epoch 01:  55%|█████▍    | 2662/4850 [12:43<10:47,  3.38it/s]

Epoch: 2, Loss: 2.273580551147461


Processing epoch 01:  55%|█████▍    | 2663/4850 [12:44<10:44,  3.39it/s]

Epoch: 2, Loss: 2.5817012786865234


Processing epoch 01:  55%|█████▍    | 2664/4850 [12:44<10:48,  3.37it/s]

Epoch: 2, Loss: 2.4896621704101562


Processing epoch 01:  55%|█████▍    | 2665/4850 [12:44<10:55,  3.33it/s]

Epoch: 2, Loss: 3.150470733642578


Processing epoch 01:  55%|█████▍    | 2666/4850 [12:45<10:49,  3.36it/s]

Epoch: 2, Loss: 2.3536887168884277


Processing epoch 01:  55%|█████▍    | 2667/4850 [12:45<10:40,  3.41it/s]

Epoch: 2, Loss: 2.3993091583251953


Processing epoch 01:  55%|█████▌    | 2668/4850 [12:45<10:33,  3.44it/s]

Epoch: 2, Loss: 2.325770378112793


Processing epoch 01:  55%|█████▌    | 2669/4850 [12:46<10:37,  3.42it/s]

Epoch: 2, Loss: 2.4711222648620605


Processing epoch 01:  55%|█████▌    | 2670/4850 [12:46<10:28,  3.47it/s]

Epoch: 2, Loss: 3.20149302482605


Processing epoch 01:  55%|█████▌    | 2671/4850 [12:46<10:25,  3.49it/s]

Epoch: 2, Loss: 2.6535146236419678


Processing epoch 01:  55%|█████▌    | 2672/4850 [12:46<10:21,  3.50it/s]

Epoch: 2, Loss: 2.4741780757904053


Processing epoch 01:  55%|█████▌    | 2673/4850 [12:47<10:18,  3.52it/s]

Epoch: 2, Loss: 3.273514747619629


Processing epoch 01:  55%|█████▌    | 2674/4850 [12:47<10:28,  3.46it/s]

Epoch: 2, Loss: 2.372845411300659


Processing epoch 01:  55%|█████▌    | 2675/4850 [12:47<10:23,  3.49it/s]

Epoch: 2, Loss: 2.514373779296875


Processing epoch 01:  55%|█████▌    | 2676/4850 [12:48<10:23,  3.49it/s]

Epoch: 2, Loss: 2.5278100967407227


Processing epoch 01:  55%|█████▌    | 2677/4850 [12:48<10:21,  3.49it/s]

Epoch: 2, Loss: 2.6177568435668945


Processing epoch 01:  55%|█████▌    | 2678/4850 [12:48<10:18,  3.51it/s]

Epoch: 2, Loss: 2.428305149078369


Processing epoch 01:  55%|█████▌    | 2679/4850 [12:48<10:18,  3.51it/s]

Epoch: 2, Loss: 2.207209348678589


Processing epoch 01:  55%|█████▌    | 2680/4850 [12:49<10:18,  3.51it/s]

Epoch: 2, Loss: 1.7830214500427246


Processing epoch 01:  55%|█████▌    | 2681/4850 [12:49<10:16,  3.52it/s]

Epoch: 2, Loss: 2.26790714263916


Processing epoch 01:  55%|█████▌    | 2682/4850 [12:49<10:14,  3.53it/s]

Epoch: 2, Loss: 3.536268711090088


Processing epoch 01:  55%|█████▌    | 2683/4850 [12:50<10:16,  3.51it/s]

Epoch: 2, Loss: 2.6398630142211914


Processing epoch 01:  55%|█████▌    | 2684/4850 [12:50<10:13,  3.53it/s]

Epoch: 2, Loss: 2.457414150238037


Processing epoch 01:  55%|█████▌    | 2685/4850 [12:50<10:17,  3.50it/s]

Epoch: 2, Loss: 3.086153984069824


Processing epoch 01:  55%|█████▌    | 2686/4850 [12:50<10:16,  3.51it/s]

Epoch: 2, Loss: 2.242898941040039


Processing epoch 01:  55%|█████▌    | 2687/4850 [12:51<10:11,  3.54it/s]

Epoch: 2, Loss: 3.1723313331604004


Processing epoch 01:  55%|█████▌    | 2688/4850 [12:51<10:14,  3.52it/s]

Epoch: 2, Loss: 2.2660961151123047


Processing epoch 01:  55%|█████▌    | 2689/4850 [12:51<10:12,  3.53it/s]

Epoch: 2, Loss: 3.3153018951416016


Processing epoch 01:  55%|█████▌    | 2690/4850 [12:51<10:11,  3.53it/s]

Epoch: 2, Loss: 2.444046974182129


Processing epoch 01:  55%|█████▌    | 2691/4850 [12:52<10:14,  3.52it/s]

Epoch: 2, Loss: 2.3455028533935547


Processing epoch 01:  56%|█████▌    | 2692/4850 [12:52<10:11,  3.53it/s]

Epoch: 2, Loss: 2.9993481636047363


Processing epoch 01:  56%|█████▌    | 2693/4850 [12:52<10:10,  3.53it/s]

Epoch: 2, Loss: 2.5246353149414062


Processing epoch 01:  56%|█████▌    | 2694/4850 [12:53<10:10,  3.53it/s]

Epoch: 2, Loss: 2.3752708435058594


Processing epoch 01:  56%|█████▌    | 2695/4850 [12:53<10:10,  3.53it/s]

Epoch: 2, Loss: 2.682314872741699


Processing epoch 01:  56%|█████▌    | 2696/4850 [12:53<10:12,  3.52it/s]

Epoch: 2, Loss: 3.0235540866851807


Processing epoch 01:  56%|█████▌    | 2697/4850 [12:53<10:13,  3.51it/s]

Epoch: 2, Loss: 2.033829927444458


Processing epoch 01:  56%|█████▌    | 2698/4850 [12:54<10:09,  3.53it/s]

Epoch: 2, Loss: 2.6842308044433594


Processing epoch 01:  56%|█████▌    | 2699/4850 [12:54<10:11,  3.52it/s]

Epoch: 2, Loss: 2.498965263366699


Processing epoch 01:  56%|█████▌    | 2700/4850 [12:54<10:08,  3.53it/s]

Epoch: 2, Loss: 2.375805139541626


Processing epoch 01:  56%|█████▌    | 2701/4850 [12:55<10:15,  3.49it/s]

Epoch: 2, Loss: 2.2362310886383057


Processing epoch 01:  56%|█████▌    | 2702/4850 [12:55<10:35,  3.38it/s]

Epoch: 2, Loss: 2.5321388244628906


Processing epoch 01:  56%|█████▌    | 2703/4850 [12:55<10:35,  3.38it/s]

Epoch: 2, Loss: 2.544748306274414


Processing epoch 01:  56%|█████▌    | 2704/4850 [12:56<10:37,  3.37it/s]

Epoch: 2, Loss: 2.2376580238342285


Processing epoch 01:  56%|█████▌    | 2705/4850 [12:56<10:43,  3.34it/s]

Epoch: 2, Loss: 3.2903261184692383


Processing epoch 01:  56%|█████▌    | 2706/4850 [12:56<10:28,  3.41it/s]

Epoch: 2, Loss: 3.569581985473633


Processing epoch 01:  56%|█████▌    | 2707/4850 [12:56<10:20,  3.45it/s]

Epoch: 2, Loss: 2.4197545051574707


Processing epoch 01:  56%|█████▌    | 2708/4850 [12:57<10:13,  3.49it/s]

Epoch: 2, Loss: 2.7499146461486816


Processing epoch 01:  56%|█████▌    | 2709/4850 [12:57<10:18,  3.46it/s]

Epoch: 2, Loss: 2.5970590114593506


Processing epoch 01:  56%|█████▌    | 2710/4850 [12:57<10:21,  3.44it/s]

Epoch: 2, Loss: 1.8774950504302979


Processing epoch 01:  56%|█████▌    | 2711/4850 [12:58<10:27,  3.41it/s]

Epoch: 2, Loss: 2.8055992126464844


Processing epoch 01:  56%|█████▌    | 2712/4850 [12:58<10:40,  3.34it/s]

Epoch: 2, Loss: 2.4123077392578125


Processing epoch 01:  56%|█████▌    | 2713/4850 [12:58<10:36,  3.36it/s]

Epoch: 2, Loss: 2.2085556983947754


Processing epoch 01:  56%|█████▌    | 2714/4850 [12:58<10:34,  3.36it/s]

Epoch: 2, Loss: 3.0845673084259033


Processing epoch 01:  56%|█████▌    | 2715/4850 [12:59<10:25,  3.41it/s]

Epoch: 2, Loss: 2.628993272781372


Processing epoch 01:  56%|█████▌    | 2716/4850 [12:59<10:34,  3.36it/s]

Epoch: 2, Loss: 2.7479500770568848


Processing epoch 01:  56%|█████▌    | 2717/4850 [12:59<10:38,  3.34it/s]

Epoch: 2, Loss: 2.8243467807769775


Processing epoch 01:  56%|█████▌    | 2718/4850 [13:00<10:28,  3.39it/s]

Epoch: 2, Loss: 2.5256409645080566


Processing epoch 01:  56%|█████▌    | 2719/4850 [13:00<10:21,  3.43it/s]

Epoch: 2, Loss: 2.6293647289276123


Processing epoch 01:  56%|█████▌    | 2720/4850 [13:00<10:16,  3.45it/s]

Epoch: 2, Loss: 1.8015280961990356


Processing epoch 01:  56%|█████▌    | 2721/4850 [13:01<10:16,  3.45it/s]

Epoch: 2, Loss: 2.474468469619751


Processing epoch 01:  56%|█████▌    | 2722/4850 [13:01<10:12,  3.47it/s]

Epoch: 2, Loss: 2.1684277057647705


Processing epoch 01:  56%|█████▌    | 2723/4850 [13:01<10:07,  3.50it/s]

Epoch: 2, Loss: 2.2611806392669678


Processing epoch 01:  56%|█████▌    | 2724/4850 [13:01<10:08,  3.50it/s]

Epoch: 2, Loss: 2.4288477897644043


Processing epoch 01:  56%|█████▌    | 2725/4850 [13:02<10:04,  3.51it/s]

Epoch: 2, Loss: 2.540250778198242


Processing epoch 01:  56%|█████▌    | 2726/4850 [13:02<10:03,  3.52it/s]

Epoch: 2, Loss: 2.5618715286254883


Processing epoch 01:  56%|█████▌    | 2727/4850 [13:02<10:01,  3.53it/s]

Epoch: 2, Loss: 3.0566368103027344


Processing epoch 01:  56%|█████▌    | 2728/4850 [13:02<09:59,  3.54it/s]

Epoch: 2, Loss: 2.525383949279785


Processing epoch 01:  56%|█████▋    | 2729/4850 [13:03<10:00,  3.53it/s]

Epoch: 2, Loss: 2.423490524291992


Processing epoch 01:  56%|█████▋    | 2730/4850 [13:03<10:00,  3.53it/s]

Epoch: 2, Loss: 2.893148899078369


Processing epoch 01:  56%|█████▋    | 2731/4850 [13:03<10:00,  3.53it/s]

Epoch: 2, Loss: 4.258056640625


Processing epoch 01:  56%|█████▋    | 2732/4850 [13:04<10:06,  3.49it/s]

Epoch: 2, Loss: 2.84658145904541


Processing epoch 01:  56%|█████▋    | 2733/4850 [13:04<10:02,  3.51it/s]

Epoch: 2, Loss: 2.5428595542907715


Processing epoch 01:  56%|█████▋    | 2734/4850 [13:04<10:01,  3.52it/s]

Epoch: 2, Loss: 3.0719387531280518


Processing epoch 01:  56%|█████▋    | 2735/4850 [13:04<10:06,  3.49it/s]

Epoch: 2, Loss: 2.719114303588867


Processing epoch 01:  56%|█████▋    | 2736/4850 [13:05<10:08,  3.47it/s]

Epoch: 2, Loss: 2.187425136566162


Processing epoch 01:  56%|█████▋    | 2737/4850 [13:05<10:02,  3.51it/s]

Epoch: 2, Loss: 2.6373016834259033


Processing epoch 01:  56%|█████▋    | 2738/4850 [13:05<10:02,  3.50it/s]

Epoch: 2, Loss: 2.3392953872680664


Processing epoch 01:  56%|█████▋    | 2739/4850 [13:06<09:57,  3.54it/s]

Epoch: 2, Loss: 2.832346200942993


Processing epoch 01:  56%|█████▋    | 2740/4850 [13:06<09:56,  3.54it/s]

Epoch: 2, Loss: 2.399364948272705


Processing epoch 01:  57%|█████▋    | 2741/4850 [13:06<09:57,  3.53it/s]

Epoch: 2, Loss: 2.7839195728302


Processing epoch 01:  57%|█████▋    | 2742/4850 [13:06<09:56,  3.53it/s]

Epoch: 2, Loss: 2.186783790588379


Processing epoch 01:  57%|█████▋    | 2743/4850 [13:07<10:02,  3.50it/s]

Epoch: 2, Loss: 2.3415346145629883


Processing epoch 01:  57%|█████▋    | 2744/4850 [13:07<09:58,  3.52it/s]

Epoch: 2, Loss: 2.7190942764282227


Processing epoch 01:  57%|█████▋    | 2745/4850 [13:07<09:58,  3.52it/s]

Epoch: 2, Loss: 2.511226177215576


Processing epoch 01:  57%|█████▋    | 2746/4850 [13:08<09:57,  3.52it/s]

Epoch: 2, Loss: 2.447503089904785


Processing epoch 01:  57%|█████▋    | 2747/4850 [13:08<09:56,  3.53it/s]

Epoch: 2, Loss: 2.569916248321533


Processing epoch 01:  57%|█████▋    | 2748/4850 [13:08<09:55,  3.53it/s]

Epoch: 2, Loss: 2.7206859588623047


Processing epoch 01:  57%|█████▋    | 2749/4850 [13:08<09:56,  3.52it/s]

Epoch: 2, Loss: 2.846372604370117


Processing epoch 01:  57%|█████▋    | 2750/4850 [13:09<10:00,  3.50it/s]

Epoch: 2, Loss: 2.66701078414917


Processing epoch 01:  57%|█████▋    | 2751/4850 [13:09<09:59,  3.50it/s]

Epoch: 2, Loss: 2.741872787475586


Processing epoch 01:  57%|█████▋    | 2752/4850 [13:09<09:58,  3.50it/s]

Epoch: 2, Loss: 2.4650683403015137


Processing epoch 01:  57%|█████▋    | 2753/4850 [13:10<10:04,  3.47it/s]

Epoch: 2, Loss: 3.062845230102539


Processing epoch 01:  57%|█████▋    | 2754/4850 [13:10<10:22,  3.37it/s]

Epoch: 2, Loss: 2.6330528259277344


Processing epoch 01:  57%|█████▋    | 2755/4850 [13:10<10:27,  3.34it/s]

Epoch: 2, Loss: 3.2217891216278076


Processing epoch 01:  57%|█████▋    | 2756/4850 [13:11<10:21,  3.37it/s]

Epoch: 2, Loss: 2.921295166015625


Processing epoch 01:  57%|█████▋    | 2757/4850 [13:11<10:19,  3.38it/s]

Epoch: 2, Loss: 1.988324522972107


Processing epoch 01:  57%|█████▋    | 2758/4850 [13:11<10:16,  3.39it/s]

Epoch: 2, Loss: 2.1197683811187744


Processing epoch 01:  57%|█████▋    | 2759/4850 [13:11<10:11,  3.42it/s]

Epoch: 2, Loss: 1.943678379058838


Processing epoch 01:  57%|█████▋    | 2760/4850 [13:12<10:13,  3.41it/s]

Epoch: 2, Loss: 2.9422812461853027


Processing epoch 01:  57%|█████▋    | 2761/4850 [13:12<10:29,  3.32it/s]

Epoch: 2, Loss: 2.389652729034424


Processing epoch 01:  57%|█████▋    | 2762/4850 [13:12<10:32,  3.30it/s]

Epoch: 2, Loss: 2.4523708820343018


Processing epoch 01:  57%|█████▋    | 2763/4850 [13:13<10:26,  3.33it/s]

Epoch: 2, Loss: 3.555518627166748


Processing epoch 01:  57%|█████▋    | 2764/4850 [13:13<10:15,  3.39it/s]

Epoch: 2, Loss: 2.2924795150756836


Processing epoch 01:  57%|█████▋    | 2765/4850 [13:13<10:16,  3.38it/s]

Epoch: 2, Loss: 2.360147476196289


Processing epoch 01:  57%|█████▋    | 2766/4850 [13:13<10:11,  3.41it/s]

Epoch: 2, Loss: 2.60194730758667


Processing epoch 01:  57%|█████▋    | 2767/4850 [13:14<10:13,  3.39it/s]

Epoch: 2, Loss: 3.3750181198120117


Processing epoch 01:  57%|█████▋    | 2768/4850 [13:14<10:18,  3.36it/s]

Epoch: 2, Loss: 2.4664950370788574


Processing epoch 01:  57%|█████▋    | 2769/4850 [13:14<10:28,  3.31it/s]

Epoch: 2, Loss: 2.289249897003174


Processing epoch 01:  57%|█████▋    | 2770/4850 [13:15<10:18,  3.36it/s]

Epoch: 2, Loss: 2.5567786693573


Processing epoch 01:  57%|█████▋    | 2771/4850 [13:15<10:11,  3.40it/s]

Epoch: 2, Loss: 2.2254536151885986


Processing epoch 01:  57%|█████▋    | 2772/4850 [13:15<10:04,  3.44it/s]

Epoch: 2, Loss: 2.348381519317627


Processing epoch 01:  57%|█████▋    | 2773/4850 [13:16<09:58,  3.47it/s]

Epoch: 2, Loss: 2.817199230194092


Processing epoch 01:  57%|█████▋    | 2774/4850 [13:16<09:54,  3.49it/s]

Epoch: 2, Loss: 2.4860010147094727


Processing epoch 01:  57%|█████▋    | 2775/4850 [13:16<09:52,  3.50it/s]

Epoch: 2, Loss: 3.174459457397461


Processing epoch 01:  57%|█████▋    | 2776/4850 [13:16<09:52,  3.50it/s]

Epoch: 2, Loss: 2.630557060241699


Processing epoch 01:  57%|█████▋    | 2777/4850 [13:17<09:49,  3.51it/s]

Epoch: 2, Loss: 2.8770813941955566


Processing epoch 01:  57%|█████▋    | 2778/4850 [13:17<09:47,  3.53it/s]

Epoch: 2, Loss: 2.589857339859009


Processing epoch 01:  57%|█████▋    | 2779/4850 [13:17<09:54,  3.49it/s]

Epoch: 2, Loss: 2.1661877632141113


Processing epoch 01:  57%|█████▋    | 2780/4850 [13:18<09:49,  3.51it/s]

Epoch: 2, Loss: 3.1264290809631348


Processing epoch 01:  57%|█████▋    | 2781/4850 [13:18<09:47,  3.52it/s]

Epoch: 2, Loss: 2.055485486984253


Processing epoch 01:  57%|█████▋    | 2782/4850 [13:18<09:46,  3.53it/s]

Epoch: 2, Loss: 2.4719648361206055


Processing epoch 01:  57%|█████▋    | 2783/4850 [13:18<09:45,  3.53it/s]

Epoch: 2, Loss: 2.456435203552246


Processing epoch 01:  57%|█████▋    | 2784/4850 [13:19<09:51,  3.49it/s]

Epoch: 2, Loss: 2.4144606590270996


Processing epoch 01:  57%|█████▋    | 2785/4850 [13:19<09:50,  3.50it/s]

Epoch: 2, Loss: 2.428015947341919


Processing epoch 01:  57%|█████▋    | 2786/4850 [13:19<09:47,  3.51it/s]

Epoch: 2, Loss: 2.432126045227051


Processing epoch 01:  57%|█████▋    | 2787/4850 [13:20<09:44,  3.53it/s]

Epoch: 2, Loss: 2.072453260421753


Processing epoch 01:  57%|█████▋    | 2788/4850 [13:20<09:44,  3.53it/s]

Epoch: 2, Loss: 2.881972312927246


Processing epoch 01:  58%|█████▊    | 2789/4850 [13:20<09:42,  3.54it/s]

Epoch: 2, Loss: 2.0414137840270996


Processing epoch 01:  58%|█████▊    | 2790/4850 [13:20<09:47,  3.51it/s]

Epoch: 2, Loss: 2.386662244796753


Processing epoch 01:  58%|█████▊    | 2791/4850 [13:21<09:43,  3.53it/s]

Epoch: 2, Loss: 2.3758482933044434


Processing epoch 01:  58%|█████▊    | 2792/4850 [13:21<09:40,  3.55it/s]

Epoch: 2, Loss: 3.298664093017578


Processing epoch 01:  58%|█████▊    | 2793/4850 [13:21<09:41,  3.54it/s]

Epoch: 2, Loss: 2.3661954402923584


Processing epoch 01:  58%|█████▊    | 2794/4850 [13:22<09:41,  3.54it/s]

Epoch: 2, Loss: 2.391127109527588


Processing epoch 01:  58%|█████▊    | 2795/4850 [13:22<09:41,  3.53it/s]

Epoch: 2, Loss: 2.4404096603393555


Processing epoch 01:  58%|█████▊    | 2796/4850 [13:22<09:40,  3.54it/s]

Epoch: 2, Loss: 2.8143279552459717


Processing epoch 01:  58%|█████▊    | 2797/4850 [13:22<09:39,  3.54it/s]

Epoch: 2, Loss: 2.477602243423462


Processing epoch 01:  58%|█████▊    | 2798/4850 [13:23<09:38,  3.55it/s]

Epoch: 2, Loss: 2.735854387283325


Processing epoch 01:  58%|█████▊    | 2799/4850 [13:23<09:43,  3.51it/s]

Epoch: 2, Loss: 2.72813081741333


Processing epoch 01:  58%|█████▊    | 2800/4850 [13:23<09:40,  3.53it/s]

Epoch: 2, Loss: 2.759395122528076


Processing epoch 01:  58%|█████▊    | 2801/4850 [13:23<09:47,  3.49it/s]

Epoch: 2, Loss: 2.222282648086548


Processing epoch 01:  58%|█████▊    | 2802/4850 [13:24<09:44,  3.50it/s]

Epoch: 2, Loss: 2.492731809616089


Processing epoch 01:  58%|█████▊    | 2803/4850 [13:24<09:41,  3.52it/s]

Epoch: 2, Loss: 2.6247692108154297


Processing epoch 01:  58%|█████▊    | 2804/4850 [13:24<09:40,  3.52it/s]

Epoch: 2, Loss: 1.9585704803466797


Processing epoch 01:  58%|█████▊    | 2805/4850 [13:25<09:46,  3.49it/s]

Epoch: 2, Loss: 2.3631792068481445


Processing epoch 01:  58%|█████▊    | 2806/4850 [13:25<10:11,  3.34it/s]

Epoch: 2, Loss: 2.517202377319336


Processing epoch 01:  58%|█████▊    | 2807/4850 [13:25<10:09,  3.35it/s]

Epoch: 2, Loss: 2.4897513389587402


Processing epoch 01:  58%|█████▊    | 2808/4850 [13:26<10:07,  3.36it/s]

Epoch: 2, Loss: 2.0565314292907715


Processing epoch 01:  58%|█████▊    | 2809/4850 [13:26<10:02,  3.39it/s]

Epoch: 2, Loss: 3.069314956665039


Processing epoch 01:  58%|█████▊    | 2810/4850 [13:26<10:00,  3.40it/s]

Epoch: 2, Loss: 2.318328857421875


Processing epoch 01:  58%|█████▊    | 2811/4850 [13:26<10:08,  3.35it/s]

Epoch: 2, Loss: 2.251380443572998


Processing epoch 01:  58%|█████▊    | 2812/4850 [13:27<10:12,  3.33it/s]

Epoch: 2, Loss: 2.436950922012329


Processing epoch 01:  58%|█████▊    | 2813/4850 [13:27<10:13,  3.32it/s]

Epoch: 2, Loss: 2.8899686336517334


Processing epoch 01:  58%|█████▊    | 2814/4850 [13:27<10:17,  3.30it/s]

Epoch: 2, Loss: 2.010532855987549


Processing epoch 01:  58%|█████▊    | 2815/4850 [13:28<10:15,  3.30it/s]

Epoch: 2, Loss: 2.3655991554260254


Processing epoch 01:  58%|█████▊    | 2816/4850 [13:28<10:13,  3.32it/s]

Epoch: 2, Loss: 2.758599281311035


Processing epoch 01:  58%|█████▊    | 2817/4850 [13:28<10:21,  3.27it/s]

Epoch: 2, Loss: 2.7567944526672363


Processing epoch 01:  58%|█████▊    | 2818/4850 [13:29<10:09,  3.34it/s]

Epoch: 2, Loss: 2.6357808113098145


Processing epoch 01:  58%|█████▊    | 2819/4850 [13:29<10:13,  3.31it/s]

Epoch: 2, Loss: 2.535989999771118


Processing epoch 01:  58%|█████▊    | 2820/4850 [13:29<10:12,  3.31it/s]

Epoch: 2, Loss: 2.1267638206481934


Processing epoch 01:  58%|█████▊    | 2821/4850 [13:29<10:01,  3.37it/s]

Epoch: 2, Loss: 2.5121946334838867


Processing epoch 01:  58%|█████▊    | 2822/4850 [13:30<09:51,  3.43it/s]

Epoch: 2, Loss: 2.472531318664551


Processing epoch 01:  58%|█████▊    | 2823/4850 [13:30<09:48,  3.45it/s]

Epoch: 2, Loss: 2.613304615020752


Processing epoch 01:  58%|█████▊    | 2824/4850 [13:30<09:43,  3.47it/s]

Epoch: 2, Loss: 2.3159866333007812


Processing epoch 01:  58%|█████▊    | 2825/4850 [13:31<09:39,  3.49it/s]

Epoch: 2, Loss: 2.349489688873291


Processing epoch 01:  58%|█████▊    | 2826/4850 [13:31<09:41,  3.48it/s]

Epoch: 2, Loss: 2.522003173828125


Processing epoch 01:  58%|█████▊    | 2827/4850 [13:31<09:41,  3.48it/s]

Epoch: 2, Loss: 2.3051400184631348


Processing epoch 01:  58%|█████▊    | 2828/4850 [13:31<09:37,  3.50it/s]

Epoch: 2, Loss: 2.9245047569274902


Processing epoch 01:  58%|█████▊    | 2829/4850 [13:32<09:39,  3.48it/s]

Epoch: 2, Loss: 2.6637349128723145


Processing epoch 01:  58%|█████▊    | 2830/4850 [13:32<09:38,  3.49it/s]

Epoch: 2, Loss: 2.473548412322998


Processing epoch 01:  58%|█████▊    | 2831/4850 [13:32<09:35,  3.51it/s]

Epoch: 2, Loss: 2.4250879287719727


Processing epoch 01:  58%|█████▊    | 2832/4850 [13:33<09:34,  3.51it/s]

Epoch: 2, Loss: 2.5008649826049805


Processing epoch 01:  58%|█████▊    | 2833/4850 [13:33<09:31,  3.53it/s]

Epoch: 2, Loss: 2.409213066101074


Processing epoch 01:  58%|█████▊    | 2834/4850 [13:33<09:31,  3.53it/s]

Epoch: 2, Loss: 2.8614726066589355


Processing epoch 01:  58%|█████▊    | 2835/4850 [13:33<09:29,  3.54it/s]

Epoch: 2, Loss: 3.169865369796753


Processing epoch 01:  58%|█████▊    | 2836/4850 [13:34<09:28,  3.54it/s]

Epoch: 2, Loss: 2.464601516723633


Processing epoch 01:  58%|█████▊    | 2837/4850 [13:34<09:30,  3.53it/s]

Epoch: 2, Loss: 2.389425754547119


Processing epoch 01:  59%|█████▊    | 2838/4850 [13:34<09:31,  3.52it/s]

Epoch: 2, Loss: 2.41709303855896


Processing epoch 01:  59%|█████▊    | 2839/4850 [13:35<09:32,  3.51it/s]

Epoch: 2, Loss: 2.3319735527038574


Processing epoch 01:  59%|█████▊    | 2840/4850 [13:35<09:32,  3.51it/s]

Epoch: 2, Loss: 2.832782745361328


Processing epoch 01:  59%|█████▊    | 2841/4850 [13:35<09:33,  3.50it/s]

Epoch: 2, Loss: 2.5784859657287598


Processing epoch 01:  59%|█████▊    | 2842/4850 [13:35<09:29,  3.52it/s]

Epoch: 2, Loss: 2.370897054672241


Processing epoch 01:  59%|█████▊    | 2843/4850 [13:36<09:29,  3.52it/s]

Epoch: 2, Loss: 2.8461527824401855


Processing epoch 01:  59%|█████▊    | 2844/4850 [13:36<09:28,  3.53it/s]

Epoch: 2, Loss: 2.333441734313965


Processing epoch 01:  59%|█████▊    | 2845/4850 [13:36<09:27,  3.53it/s]

Epoch: 2, Loss: 2.010582447052002


Processing epoch 01:  59%|█████▊    | 2846/4850 [13:37<09:27,  3.53it/s]

Epoch: 2, Loss: 2.917600631713867


Processing epoch 01:  59%|█████▊    | 2847/4850 [13:37<09:28,  3.52it/s]

Epoch: 2, Loss: 2.764878749847412


Processing epoch 01:  59%|█████▊    | 2848/4850 [13:37<09:37,  3.46it/s]

Epoch: 2, Loss: 3.20158314704895


Processing epoch 01:  59%|█████▊    | 2849/4850 [13:37<09:33,  3.49it/s]

Epoch: 2, Loss: 2.77919340133667


Processing epoch 01:  59%|█████▉    | 2850/4850 [13:38<09:31,  3.50it/s]

Epoch: 2, Loss: 2.502638816833496


Processing epoch 01:  59%|█████▉    | 2851/4850 [13:38<09:30,  3.51it/s]

Epoch: 2, Loss: 2.9348528385162354


Processing epoch 01:  59%|█████▉    | 2852/4850 [13:38<09:27,  3.52it/s]

Epoch: 2, Loss: 2.647653579711914


Processing epoch 01:  59%|█████▉    | 2853/4850 [13:39<09:25,  3.53it/s]

Epoch: 2, Loss: 2.4703500270843506


Processing epoch 01:  59%|█████▉    | 2854/4850 [13:39<09:22,  3.55it/s]

Epoch: 2, Loss: 3.5270938873291016


Processing epoch 01:  59%|█████▉    | 2855/4850 [13:39<09:24,  3.53it/s]

Epoch: 2, Loss: 2.2698445320129395


Processing epoch 01:  59%|█████▉    | 2856/4850 [13:39<09:25,  3.52it/s]

Epoch: 2, Loss: 2.6349711418151855


Processing epoch 01:  59%|█████▉    | 2857/4850 [13:40<09:25,  3.53it/s]

Epoch: 2, Loss: 2.7191107273101807


Processing epoch 01:  59%|█████▉    | 2858/4850 [13:40<09:36,  3.46it/s]

Epoch: 2, Loss: 2.372060775756836


Processing epoch 01:  59%|█████▉    | 2859/4850 [13:40<09:41,  3.42it/s]

Epoch: 2, Loss: 2.8568766117095947


Processing epoch 01:  59%|█████▉    | 2860/4850 [13:41<09:45,  3.40it/s]

Epoch: 2, Loss: 3.555745840072632


Processing epoch 01:  59%|█████▉    | 2861/4850 [13:41<09:41,  3.42it/s]

Epoch: 2, Loss: 2.3321614265441895


Processing epoch 01:  59%|█████▉    | 2862/4850 [13:41<09:54,  3.35it/s]

Epoch: 2, Loss: 2.438958168029785


Processing epoch 01:  59%|█████▉    | 2863/4850 [13:41<09:40,  3.43it/s]

Epoch: 2, Loss: 2.9044156074523926


Processing epoch 01:  59%|█████▉    | 2864/4850 [13:42<09:50,  3.37it/s]

Epoch: 2, Loss: 2.246286153793335


Processing epoch 01:  59%|█████▉    | 2865/4850 [13:42<09:53,  3.34it/s]

Epoch: 2, Loss: 2.926673412322998


Processing epoch 01:  59%|█████▉    | 2866/4850 [13:42<09:49,  3.37it/s]

Epoch: 2, Loss: 2.5233678817749023


Processing epoch 01:  59%|█████▉    | 2867/4850 [13:43<09:53,  3.34it/s]

Epoch: 2, Loss: 2.3015048503875732


Processing epoch 01:  59%|█████▉    | 2868/4850 [13:43<10:04,  3.28it/s]

Epoch: 2, Loss: 2.6485486030578613


Processing epoch 01:  59%|█████▉    | 2869/4850 [13:43<10:01,  3.30it/s]

Epoch: 2, Loss: 2.9312000274658203


Processing epoch 01:  59%|█████▉    | 2870/4850 [13:44<09:54,  3.33it/s]

Epoch: 2, Loss: 3.4426093101501465


Processing epoch 01:  59%|█████▉    | 2871/4850 [13:44<09:56,  3.32it/s]

Epoch: 2, Loss: 2.5993387699127197


Processing epoch 01:  59%|█████▉    | 2872/4850 [13:44<09:48,  3.36it/s]

Epoch: 2, Loss: 2.5800867080688477


Processing epoch 01:  59%|█████▉    | 2873/4850 [13:44<09:39,  3.41it/s]

Epoch: 2, Loss: 2.6541781425476074


Processing epoch 01:  59%|█████▉    | 2874/4850 [13:45<09:34,  3.44it/s]

Epoch: 2, Loss: 2.5899529457092285


Processing epoch 01:  59%|█████▉    | 2875/4850 [13:45<09:30,  3.46it/s]

Epoch: 2, Loss: 2.2806196212768555


Processing epoch 01:  59%|█████▉    | 2876/4850 [13:45<09:28,  3.47it/s]

Epoch: 2, Loss: 2.4183459281921387


Processing epoch 01:  59%|█████▉    | 2877/4850 [13:46<09:32,  3.45it/s]

Epoch: 2, Loss: 2.9173316955566406


Processing epoch 01:  59%|█████▉    | 2878/4850 [13:46<09:28,  3.47it/s]

Epoch: 2, Loss: 2.521697521209717


Processing epoch 01:  59%|█████▉    | 2879/4850 [13:46<09:26,  3.48it/s]

Epoch: 2, Loss: 2.3607873916625977


Processing epoch 01:  59%|█████▉    | 2880/4850 [13:46<09:22,  3.50it/s]

Epoch: 2, Loss: 2.2005887031555176


Processing epoch 01:  59%|█████▉    | 2881/4850 [13:47<09:21,  3.51it/s]

Epoch: 2, Loss: 2.579925775527954


Processing epoch 01:  59%|█████▉    | 2882/4850 [13:47<09:20,  3.51it/s]

Epoch: 2, Loss: 2.576416015625


Processing epoch 01:  59%|█████▉    | 2883/4850 [13:47<09:22,  3.49it/s]

Epoch: 2, Loss: 1.9164912700653076


Processing epoch 01:  59%|█████▉    | 2884/4850 [13:48<09:22,  3.49it/s]

Epoch: 2, Loss: 2.6245124340057373


Processing epoch 01:  59%|█████▉    | 2885/4850 [13:48<09:20,  3.51it/s]

Epoch: 2, Loss: 2.390137195587158


Processing epoch 01:  60%|█████▉    | 2886/4850 [13:48<09:22,  3.49it/s]

Epoch: 2, Loss: 2.753519058227539


Processing epoch 01:  60%|█████▉    | 2887/4850 [13:48<09:18,  3.52it/s]

Epoch: 2, Loss: 2.647550582885742


Processing epoch 01:  60%|█████▉    | 2888/4850 [13:49<09:16,  3.52it/s]

Epoch: 2, Loss: 2.473242998123169


Processing epoch 01:  60%|█████▉    | 2889/4850 [13:49<09:15,  3.53it/s]

Epoch: 2, Loss: 3.3060171604156494


Processing epoch 01:  60%|█████▉    | 2890/4850 [13:49<09:13,  3.54it/s]

Epoch: 2, Loss: 3.171574592590332


Processing epoch 01:  60%|█████▉    | 2891/4850 [13:50<09:14,  3.53it/s]

Epoch: 2, Loss: 2.8060643672943115


Processing epoch 01:  60%|█████▉    | 2892/4850 [13:50<09:16,  3.52it/s]

Epoch: 2, Loss: 2.4132542610168457


Processing epoch 01:  60%|█████▉    | 2893/4850 [13:50<09:15,  3.52it/s]

Epoch: 2, Loss: 2.428008556365967


Processing epoch 01:  60%|█████▉    | 2894/4850 [13:50<09:12,  3.54it/s]

Epoch: 2, Loss: 2.611748218536377


Processing epoch 01:  60%|█████▉    | 2895/4850 [13:51<09:15,  3.52it/s]

Epoch: 2, Loss: 2.701669216156006


Processing epoch 01:  60%|█████▉    | 2896/4850 [13:51<09:12,  3.54it/s]

Epoch: 2, Loss: 2.640249490737915


Processing epoch 01:  60%|█████▉    | 2897/4850 [13:51<09:12,  3.54it/s]

Epoch: 2, Loss: 2.7317698001861572


Processing epoch 01:  60%|█████▉    | 2898/4850 [13:52<09:07,  3.57it/s]

Epoch: 2, Loss: 4.245996952056885


Processing epoch 01:  60%|█████▉    | 2899/4850 [13:52<09:08,  3.56it/s]

Epoch: 2, Loss: 2.193119764328003


Processing epoch 01:  60%|█████▉    | 2900/4850 [13:52<09:11,  3.54it/s]

Epoch: 2, Loss: 2.7231943607330322


Processing epoch 01:  60%|█████▉    | 2901/4850 [13:52<09:11,  3.53it/s]

Epoch: 2, Loss: 2.250516414642334


Processing epoch 01:  60%|█████▉    | 2902/4850 [13:53<09:10,  3.54it/s]

Epoch: 2, Loss: 2.010284185409546


Processing epoch 01:  60%|█████▉    | 2903/4850 [13:53<09:09,  3.54it/s]

Epoch: 2, Loss: 2.40610933303833


Processing epoch 01:  60%|█████▉    | 2904/4850 [13:53<09:09,  3.54it/s]

Epoch: 2, Loss: 2.9775214195251465


Processing epoch 01:  60%|█████▉    | 2905/4850 [13:54<09:09,  3.54it/s]

Epoch: 2, Loss: 2.8607962131500244


Processing epoch 01:  60%|█████▉    | 2906/4850 [13:54<09:19,  3.48it/s]

Epoch: 2, Loss: 2.6372928619384766


Processing epoch 01:  60%|█████▉    | 2907/4850 [13:54<09:25,  3.44it/s]

Epoch: 2, Loss: 2.703828811645508


Processing epoch 01:  60%|█████▉    | 2908/4850 [13:54<09:32,  3.39it/s]

Epoch: 2, Loss: 2.5927505493164062


Processing epoch 01:  60%|█████▉    | 2909/4850 [13:55<09:28,  3.42it/s]

Epoch: 2, Loss: 2.265937566757202


Processing epoch 01:  60%|██████    | 2910/4850 [13:55<09:36,  3.37it/s]

Epoch: 2, Loss: 2.306209087371826


Processing epoch 01:  60%|██████    | 2911/4850 [13:55<09:39,  3.35it/s]

Epoch: 2, Loss: 2.5387282371520996


Processing epoch 01:  60%|██████    | 2912/4850 [13:56<09:39,  3.34it/s]

Epoch: 2, Loss: 2.269944190979004


Processing epoch 01:  60%|██████    | 2913/4850 [13:56<09:43,  3.32it/s]

Epoch: 2, Loss: 2.5814361572265625


Processing epoch 01:  60%|██████    | 2914/4850 [13:56<09:45,  3.31it/s]

Epoch: 2, Loss: 2.187429428100586


Processing epoch 01:  60%|██████    | 2915/4850 [13:57<09:40,  3.33it/s]

Epoch: 2, Loss: 2.8437609672546387


Processing epoch 01:  60%|██████    | 2916/4850 [13:57<09:38,  3.35it/s]

Epoch: 2, Loss: 2.6716926097869873


Processing epoch 01:  60%|██████    | 2917/4850 [13:57<09:45,  3.30it/s]

Epoch: 2, Loss: 2.1310501098632812


Processing epoch 01:  60%|██████    | 2918/4850 [13:57<09:40,  3.33it/s]

Epoch: 2, Loss: 2.378077507019043


Processing epoch 01:  60%|██████    | 2919/4850 [13:58<09:33,  3.37it/s]

Epoch: 2, Loss: 2.1064882278442383


Processing epoch 01:  60%|██████    | 2920/4850 [13:58<09:29,  3.39it/s]

Epoch: 2, Loss: 2.543832302093506


Processing epoch 01:  60%|██████    | 2921/4850 [13:58<09:29,  3.39it/s]

Epoch: 2, Loss: 2.3918633460998535


Processing epoch 01:  60%|██████    | 2922/4850 [13:59<09:31,  3.37it/s]

Epoch: 2, Loss: 2.840549945831299


Processing epoch 01:  60%|██████    | 2923/4850 [13:59<09:29,  3.39it/s]

Epoch: 2, Loss: 2.57694149017334


Processing epoch 01:  60%|██████    | 2924/4850 [13:59<09:24,  3.41it/s]

Epoch: 2, Loss: 2.2631516456604004


Processing epoch 01:  60%|██████    | 2925/4850 [13:59<09:21,  3.43it/s]

Epoch: 2, Loss: 2.2097864151000977


Processing epoch 01:  60%|██████    | 2926/4850 [14:00<09:13,  3.47it/s]

Epoch: 2, Loss: 2.771653175354004


Processing epoch 01:  60%|██████    | 2927/4850 [14:00<09:10,  3.49it/s]

Epoch: 2, Loss: 2.558532953262329


Processing epoch 01:  60%|██████    | 2928/4850 [14:00<09:07,  3.51it/s]

Epoch: 2, Loss: 2.250640392303467


Processing epoch 01:  60%|██████    | 2929/4850 [14:01<09:07,  3.51it/s]

Epoch: 2, Loss: 2.4070510864257812


Processing epoch 01:  60%|██████    | 2930/4850 [14:01<09:04,  3.53it/s]

Epoch: 2, Loss: 2.466313123703003


Processing epoch 01:  60%|██████    | 2931/4850 [14:01<09:10,  3.49it/s]

Epoch: 2, Loss: 2.593400478363037


Processing epoch 01:  60%|██████    | 2932/4850 [14:01<09:06,  3.51it/s]

Epoch: 2, Loss: 2.601121425628662


Processing epoch 01:  60%|██████    | 2933/4850 [14:02<09:02,  3.53it/s]

Epoch: 2, Loss: 2.1851491928100586


Processing epoch 01:  60%|██████    | 2934/4850 [14:02<09:01,  3.54it/s]

Epoch: 2, Loss: 2.4108967781066895


Processing epoch 01:  61%|██████    | 2935/4850 [14:02<09:00,  3.54it/s]

Epoch: 2, Loss: 2.8501110076904297


Processing epoch 01:  61%|██████    | 2936/4850 [14:03<09:00,  3.54it/s]

Epoch: 2, Loss: 2.6461105346679688


Processing epoch 01:  61%|██████    | 2937/4850 [14:03<09:00,  3.54it/s]

Epoch: 2, Loss: 2.503389835357666


Processing epoch 01:  61%|██████    | 2938/4850 [14:03<09:03,  3.52it/s]

Epoch: 2, Loss: 2.3713765144348145


Processing epoch 01:  61%|██████    | 2939/4850 [14:03<09:00,  3.53it/s]

Epoch: 2, Loss: 3.028079032897949


Processing epoch 01:  61%|██████    | 2940/4850 [14:04<09:01,  3.53it/s]

Epoch: 2, Loss: 2.7784459590911865


Processing epoch 01:  61%|██████    | 2941/4850 [14:04<09:02,  3.52it/s]

Epoch: 2, Loss: 2.3927197456359863


Processing epoch 01:  61%|██████    | 2942/4850 [14:04<09:12,  3.45it/s]

Epoch: 2, Loss: 2.8402726650238037


Processing epoch 01:  61%|██████    | 2943/4850 [14:05<09:08,  3.48it/s]

Epoch: 2, Loss: 2.448624610900879


Processing epoch 01:  61%|██████    | 2944/4850 [14:05<09:06,  3.49it/s]

Epoch: 2, Loss: 3.060884952545166


Processing epoch 01:  61%|██████    | 2945/4850 [14:05<09:07,  3.48it/s]

Epoch: 2, Loss: 2.5231637954711914


Processing epoch 01:  61%|██████    | 2946/4850 [14:05<09:03,  3.51it/s]

Epoch: 2, Loss: 2.841092109680176


Processing epoch 01:  61%|██████    | 2947/4850 [14:06<08:59,  3.53it/s]

Epoch: 2, Loss: 3.045950412750244


Processing epoch 01:  61%|██████    | 2948/4850 [14:06<09:01,  3.51it/s]

Epoch: 2, Loss: 2.4211864471435547


Processing epoch 01:  61%|██████    | 2949/4850 [14:06<09:00,  3.52it/s]

Epoch: 2, Loss: 2.8190627098083496


Processing epoch 01:  61%|██████    | 2950/4850 [14:07<08:58,  3.53it/s]

Epoch: 2, Loss: 2.648881196975708


Processing epoch 01:  61%|██████    | 2951/4850 [14:07<08:55,  3.55it/s]

Epoch: 2, Loss: 2.7559499740600586


Processing epoch 01:  61%|██████    | 2952/4850 [14:07<08:54,  3.55it/s]

Epoch: 2, Loss: 2.737508773803711


Processing epoch 01:  61%|██████    | 2953/4850 [14:07<09:02,  3.50it/s]

Epoch: 2, Loss: 2.288989543914795


Processing epoch 01:  61%|██████    | 2954/4850 [14:08<09:01,  3.50it/s]

Epoch: 2, Loss: 2.713195323944092


Processing epoch 01:  61%|██████    | 2955/4850 [14:08<08:57,  3.52it/s]

Epoch: 2, Loss: 2.0887322425842285


Processing epoch 01:  61%|██████    | 2956/4850 [14:08<08:54,  3.54it/s]

Epoch: 2, Loss: 3.5341994762420654


Processing epoch 01:  61%|██████    | 2957/4850 [14:09<08:56,  3.53it/s]

Epoch: 2, Loss: 2.447906017303467


Processing epoch 01:  61%|██████    | 2958/4850 [14:09<08:58,  3.51it/s]

Epoch: 2, Loss: 3.020669937133789


Processing epoch 01:  61%|██████    | 2959/4850 [14:09<09:08,  3.45it/s]

Epoch: 2, Loss: 2.829440116882324


Processing epoch 01:  61%|██████    | 2960/4850 [14:09<09:09,  3.44it/s]

Epoch: 2, Loss: 2.505819797515869


Processing epoch 01:  61%|██████    | 2961/4850 [14:10<09:24,  3.35it/s]

Epoch: 2, Loss: 2.9553794860839844


Processing epoch 01:  61%|██████    | 2962/4850 [14:10<09:22,  3.35it/s]

Epoch: 2, Loss: 2.819465160369873


Processing epoch 01:  61%|██████    | 2963/4850 [14:10<09:30,  3.31it/s]

Epoch: 2, Loss: 2.5321412086486816


Processing epoch 01:  61%|██████    | 2964/4850 [14:11<09:28,  3.31it/s]

Epoch: 2, Loss: 1.9471430778503418


Processing epoch 01:  61%|██████    | 2965/4850 [14:11<09:18,  3.38it/s]

Epoch: 2, Loss: 3.100264072418213


Processing epoch 01:  61%|██████    | 2966/4850 [14:11<09:34,  3.28it/s]

Epoch: 2, Loss: 2.1256282329559326


Processing epoch 01:  61%|██████    | 2967/4850 [14:12<09:31,  3.30it/s]

Epoch: 2, Loss: 2.7795162200927734


Processing epoch 01:  61%|██████    | 2968/4850 [14:12<09:29,  3.31it/s]

Epoch: 2, Loss: 2.1689603328704834


Processing epoch 01:  61%|██████    | 2969/4850 [14:12<09:22,  3.34it/s]

Epoch: 2, Loss: 2.430818557739258


Processing epoch 01:  61%|██████    | 2970/4850 [14:12<09:19,  3.36it/s]

Epoch: 2, Loss: 2.36081600189209


Processing epoch 01:  61%|██████▏   | 2971/4850 [14:13<09:20,  3.35it/s]

Epoch: 2, Loss: 2.3526883125305176


Processing epoch 01:  61%|██████▏   | 2972/4850 [14:13<09:19,  3.36it/s]

Epoch: 2, Loss: 3.3365111351013184


Processing epoch 01:  61%|██████▏   | 2973/4850 [14:13<09:20,  3.35it/s]

Epoch: 2, Loss: 3.5862064361572266


Processing epoch 01:  61%|██████▏   | 2974/4850 [14:14<09:27,  3.30it/s]

Epoch: 2, Loss: 2.6637418270111084


Processing epoch 01:  61%|██████▏   | 2975/4850 [14:14<09:18,  3.35it/s]

Epoch: 2, Loss: 2.3119895458221436


Processing epoch 01:  61%|██████▏   | 2976/4850 [14:14<09:12,  3.39it/s]

Epoch: 2, Loss: 2.014618396759033


Processing epoch 01:  61%|██████▏   | 2977/4850 [14:15<09:06,  3.43it/s]

Epoch: 2, Loss: 2.7005727291107178


Processing epoch 01:  61%|██████▏   | 2978/4850 [14:15<09:08,  3.41it/s]

Epoch: 2, Loss: 2.4316892623901367


Processing epoch 01:  61%|██████▏   | 2979/4850 [14:15<09:01,  3.45it/s]

Epoch: 2, Loss: 2.6557679176330566


Processing epoch 01:  61%|██████▏   | 2980/4850 [14:15<08:59,  3.47it/s]

Epoch: 2, Loss: 2.965266704559326


Processing epoch 01:  61%|██████▏   | 2981/4850 [14:16<08:56,  3.48it/s]

Epoch: 2, Loss: 2.3934707641601562


Processing epoch 01:  61%|██████▏   | 2982/4850 [14:16<08:55,  3.49it/s]

Epoch: 2, Loss: 2.228553056716919


Processing epoch 01:  62%|██████▏   | 2983/4850 [14:16<08:53,  3.50it/s]

Epoch: 2, Loss: 2.5299642086029053


Processing epoch 01:  62%|██████▏   | 2984/4850 [14:17<08:55,  3.49it/s]

Epoch: 2, Loss: 2.5264768600463867


Processing epoch 01:  62%|██████▏   | 2985/4850 [14:17<08:58,  3.47it/s]

Epoch: 2, Loss: 2.649021625518799


Processing epoch 01:  62%|██████▏   | 2986/4850 [14:17<08:54,  3.49it/s]

Epoch: 2, Loss: 2.185953378677368


Processing epoch 01:  62%|██████▏   | 2987/4850 [14:17<08:49,  3.52it/s]

Epoch: 2, Loss: 3.8150973320007324


Processing epoch 01:  62%|██████▏   | 2988/4850 [14:18<08:53,  3.49it/s]

Epoch: 2, Loss: 2.2860612869262695


Processing epoch 01:  62%|██████▏   | 2989/4850 [14:18<08:51,  3.50it/s]

Epoch: 2, Loss: 2.683305263519287


Processing epoch 01:  62%|██████▏   | 2990/4850 [14:18<08:54,  3.48it/s]

Epoch: 2, Loss: 2.3858399391174316


Processing epoch 01:  62%|██████▏   | 2991/4850 [14:19<08:49,  3.51it/s]

Epoch: 2, Loss: 2.266502857208252


Processing epoch 01:  62%|██████▏   | 2992/4850 [14:19<08:48,  3.51it/s]

Epoch: 2, Loss: 2.2082176208496094


Processing epoch 01:  62%|██████▏   | 2993/4850 [14:19<08:53,  3.48it/s]

Epoch: 2, Loss: 2.2580676078796387


Processing epoch 01:  62%|██████▏   | 2994/4850 [14:19<08:49,  3.50it/s]

Epoch: 2, Loss: 2.4437973499298096


Processing epoch 01:  62%|██████▏   | 2995/4850 [14:20<08:44,  3.54it/s]

Epoch: 2, Loss: 2.9279675483703613


Processing epoch 01:  62%|██████▏   | 2996/4850 [14:20<08:51,  3.49it/s]

Epoch: 2, Loss: 2.9591898918151855


Processing epoch 01:  62%|██████▏   | 2997/4850 [14:20<08:48,  3.51it/s]

Epoch: 2, Loss: 2.770616054534912


Processing epoch 01:  62%|██████▏   | 2998/4850 [14:21<08:44,  3.53it/s]

Epoch: 2, Loss: 3.0489211082458496


Processing epoch 01:  62%|██████▏   | 2999/4850 [14:21<09:01,  3.42it/s]

Epoch: 2, Loss: 2.3240702152252197


Processing epoch 01:  62%|██████▏   | 3000/4850 [14:21<08:56,  3.45it/s]

Epoch: 2, Loss: 2.7338061332702637


Processing epoch 01:  62%|██████▏   | 3001/4850 [14:21<08:52,  3.47it/s]

Epoch: 2, Loss: 2.1768600940704346


Processing epoch 01:  62%|██████▏   | 3002/4850 [14:22<08:55,  3.45it/s]

Epoch: 2, Loss: 2.6810855865478516


Processing epoch 01:  62%|██████▏   | 3003/4850 [14:22<08:55,  3.45it/s]

Epoch: 2, Loss: 2.8765339851379395


Processing epoch 01:  62%|██████▏   | 3004/4850 [14:22<08:51,  3.47it/s]

Epoch: 2, Loss: 2.3225162029266357


Processing epoch 01:  62%|██████▏   | 3005/4850 [14:23<08:47,  3.50it/s]

Epoch: 2, Loss: 2.536879539489746


Processing epoch 01:  62%|██████▏   | 3006/4850 [14:23<08:45,  3.51it/s]

Epoch: 2, Loss: 2.5121848583221436


Processing epoch 01:  62%|██████▏   | 3007/4850 [14:23<08:44,  3.51it/s]

Epoch: 2, Loss: 1.9684138298034668


Processing epoch 01:  62%|██████▏   | 3008/4850 [14:23<08:42,  3.52it/s]

Epoch: 2, Loss: 2.9695887565612793


Processing epoch 01:  62%|██████▏   | 3009/4850 [14:24<08:42,  3.52it/s]

Epoch: 2, Loss: 2.251171588897705


Processing epoch 01:  62%|██████▏   | 3010/4850 [14:24<08:53,  3.45it/s]

Epoch: 2, Loss: 2.0688700675964355


Processing epoch 01:  62%|██████▏   | 3011/4850 [14:24<08:54,  3.44it/s]

Epoch: 2, Loss: 2.4356861114501953


Processing epoch 01:  62%|██████▏   | 3012/4850 [14:25<08:53,  3.44it/s]

Epoch: 2, Loss: 3.1075682640075684


Processing epoch 01:  62%|██████▏   | 3013/4850 [14:25<08:55,  3.43it/s]

Epoch: 2, Loss: 2.4365339279174805


Processing epoch 01:  62%|██████▏   | 3014/4850 [14:25<09:00,  3.40it/s]

Epoch: 2, Loss: 2.4404101371765137


Processing epoch 01:  62%|██████▏   | 3015/4850 [14:25<08:54,  3.44it/s]

Epoch: 2, Loss: 2.2413172721862793


Processing epoch 01:  62%|██████▏   | 3016/4850 [14:26<08:47,  3.48it/s]

Epoch: 2, Loss: 2.6898303031921387


Processing epoch 01:  62%|██████▏   | 3017/4850 [14:26<08:51,  3.45it/s]

Epoch: 2, Loss: 2.2517426013946533


Processing epoch 01:  62%|██████▏   | 3018/4850 [14:26<08:51,  3.45it/s]

Epoch: 2, Loss: 2.569007396697998


Processing epoch 01:  62%|██████▏   | 3019/4850 [14:27<08:53,  3.43it/s]

Epoch: 2, Loss: 2.7331204414367676


Processing epoch 01:  62%|██████▏   | 3020/4850 [14:27<08:56,  3.41it/s]

Epoch: 2, Loss: 2.6850717067718506


Processing epoch 01:  62%|██████▏   | 3021/4850 [14:27<09:04,  3.36it/s]

Epoch: 2, Loss: 2.9079155921936035


Processing epoch 01:  62%|██████▏   | 3022/4850 [14:28<09:02,  3.37it/s]

Epoch: 2, Loss: 2.587761402130127


Processing epoch 01:  62%|██████▏   | 3023/4850 [14:28<09:00,  3.38it/s]

Epoch: 2, Loss: 2.7967662811279297


Processing epoch 01:  62%|██████▏   | 3024/4850 [14:28<09:10,  3.32it/s]

Epoch: 2, Loss: 2.424715042114258


Processing epoch 01:  62%|██████▏   | 3025/4850 [14:28<09:17,  3.27it/s]

Epoch: 2, Loss: 2.4805026054382324


Processing epoch 01:  62%|██████▏   | 3026/4850 [14:29<09:04,  3.35it/s]

Epoch: 2, Loss: 2.8313369750976562


Processing epoch 01:  62%|██████▏   | 3027/4850 [14:29<08:58,  3.39it/s]

Epoch: 2, Loss: 2.3066725730895996


Processing epoch 01:  62%|██████▏   | 3028/4850 [14:29<08:55,  3.40it/s]

Epoch: 2, Loss: 2.6559627056121826


Processing epoch 01:  62%|██████▏   | 3029/4850 [14:30<08:49,  3.44it/s]

Epoch: 2, Loss: 2.788856267929077


Processing epoch 01:  62%|██████▏   | 3030/4850 [14:30<08:46,  3.46it/s]

Epoch: 2, Loss: 2.5072007179260254


Processing epoch 01:  62%|██████▏   | 3031/4850 [14:30<08:46,  3.45it/s]

Epoch: 2, Loss: 2.128228187561035


Processing epoch 01:  63%|██████▎   | 3032/4850 [14:30<08:51,  3.42it/s]

Epoch: 2, Loss: 3.0571725368499756


Processing epoch 01:  63%|██████▎   | 3033/4850 [14:31<08:52,  3.41it/s]

Epoch: 2, Loss: 2.530569076538086


Processing epoch 01:  63%|██████▎   | 3034/4850 [14:31<08:46,  3.45it/s]

Epoch: 2, Loss: 2.665722370147705


Processing epoch 01:  63%|██████▎   | 3035/4850 [14:31<08:42,  3.47it/s]

Epoch: 2, Loss: 1.8901101350784302


Processing epoch 01:  63%|██████▎   | 3036/4850 [14:32<08:40,  3.48it/s]

Epoch: 2, Loss: 2.043201208114624


Processing epoch 01:  63%|██████▎   | 3037/4850 [14:32<08:39,  3.49it/s]

Epoch: 2, Loss: 2.9934616088867188


Processing epoch 01:  63%|██████▎   | 3038/4850 [14:32<08:37,  3.50it/s]

Epoch: 2, Loss: 2.3883817195892334


Processing epoch 01:  63%|██████▎   | 3039/4850 [14:32<08:40,  3.48it/s]

Epoch: 2, Loss: 1.9638895988464355


Processing epoch 01:  63%|██████▎   | 3040/4850 [14:33<08:37,  3.50it/s]

Epoch: 2, Loss: 2.3793718814849854


Processing epoch 01:  63%|██████▎   | 3041/4850 [14:33<08:35,  3.51it/s]

Epoch: 2, Loss: 2.5591483116149902


Processing epoch 01:  63%|██████▎   | 3042/4850 [14:33<08:42,  3.46it/s]

Epoch: 2, Loss: 2.968346118927002


Processing epoch 01:  63%|██████▎   | 3043/4850 [14:34<08:46,  3.43it/s]

Epoch: 2, Loss: 2.132827043533325


Processing epoch 01:  63%|██████▎   | 3044/4850 [14:34<08:47,  3.43it/s]

Epoch: 2, Loss: 2.176288366317749


Processing epoch 01:  63%|██████▎   | 3045/4850 [14:34<08:52,  3.39it/s]

Epoch: 2, Loss: 2.379838466644287


Processing epoch 01:  63%|██████▎   | 3046/4850 [14:35<08:56,  3.36it/s]

Epoch: 2, Loss: 2.655155897140503


Processing epoch 01:  63%|██████▎   | 3047/4850 [14:35<08:59,  3.34it/s]

Epoch: 2, Loss: 2.4964237213134766


Processing epoch 01:  63%|██████▎   | 3048/4850 [14:35<08:59,  3.34it/s]

Epoch: 2, Loss: 2.5109310150146484


Processing epoch 01:  63%|██████▎   | 3049/4850 [14:35<08:54,  3.37it/s]

Epoch: 2, Loss: 2.7881293296813965


Processing epoch 01:  63%|██████▎   | 3050/4850 [14:36<09:01,  3.33it/s]

Epoch: 2, Loss: 2.2412331104278564


Processing epoch 01:  63%|██████▎   | 3051/4850 [14:36<08:53,  3.37it/s]

Epoch: 2, Loss: 2.9756884574890137


Processing epoch 01:  63%|██████▎   | 3052/4850 [14:36<08:51,  3.38it/s]

Epoch: 2, Loss: 2.655374526977539


Processing epoch 01:  63%|██████▎   | 3053/4850 [14:37<08:50,  3.39it/s]

Epoch: 2, Loss: 3.811332941055298


Processing epoch 01:  63%|██████▎   | 3054/4850 [14:37<08:47,  3.41it/s]

Epoch: 2, Loss: 2.768610954284668


Processing epoch 01:  63%|██████▎   | 3055/4850 [14:37<08:54,  3.36it/s]

Epoch: 2, Loss: 2.85601544380188


Processing epoch 01:  63%|██████▎   | 3056/4850 [14:38<08:58,  3.33it/s]

Epoch: 2, Loss: 3.247474193572998


Processing epoch 01:  63%|██████▎   | 3057/4850 [14:38<08:55,  3.35it/s]

Epoch: 2, Loss: 2.6737895011901855


Processing epoch 01:  63%|██████▎   | 3058/4850 [14:38<08:47,  3.40it/s]

Epoch: 2, Loss: 2.2607133388519287


Processing epoch 01:  63%|██████▎   | 3059/4850 [14:38<08:44,  3.42it/s]

Epoch: 2, Loss: 2.285315990447998


Processing epoch 01:  63%|██████▎   | 3060/4850 [14:39<08:45,  3.41it/s]

Epoch: 2, Loss: 1.8019022941589355


Processing epoch 01:  63%|██████▎   | 3061/4850 [14:39<08:52,  3.36it/s]

Epoch: 2, Loss: 2.2754416465759277


Processing epoch 01:  63%|██████▎   | 3062/4850 [14:39<08:54,  3.34it/s]

Epoch: 2, Loss: 2.365417957305908


Processing epoch 01:  63%|██████▎   | 3063/4850 [14:40<08:47,  3.39it/s]

Epoch: 2, Loss: 2.4840052127838135


Processing epoch 01:  63%|██████▎   | 3064/4850 [14:40<08:51,  3.36it/s]

Epoch: 2, Loss: 2.466804265975952


Processing epoch 01:  63%|██████▎   | 3065/4850 [14:40<08:55,  3.33it/s]

Epoch: 2, Loss: 2.110830545425415


Processing epoch 01:  63%|██████▎   | 3066/4850 [14:40<08:51,  3.36it/s]

Epoch: 2, Loss: 2.5764617919921875


Processing epoch 01:  63%|██████▎   | 3067/4850 [14:41<08:50,  3.36it/s]

Epoch: 2, Loss: 1.954845905303955


Processing epoch 01:  63%|██████▎   | 3068/4850 [14:41<08:55,  3.33it/s]

Epoch: 2, Loss: 2.5618999004364014


Processing epoch 01:  63%|██████▎   | 3069/4850 [14:41<08:54,  3.33it/s]

Epoch: 2, Loss: 2.347762107849121


Processing epoch 01:  63%|██████▎   | 3070/4850 [14:42<08:55,  3.32it/s]

Epoch: 2, Loss: 2.062687397003174


Processing epoch 01:  63%|██████▎   | 3071/4850 [14:42<08:47,  3.37it/s]

Epoch: 2, Loss: 2.3570919036865234


Processing epoch 01:  63%|██████▎   | 3072/4850 [14:42<08:55,  3.32it/s]

Epoch: 2, Loss: 2.1570498943328857


Processing epoch 01:  63%|██████▎   | 3073/4850 [14:43<08:50,  3.35it/s]

Epoch: 2, Loss: 2.2400951385498047


Processing epoch 01:  63%|██████▎   | 3074/4850 [14:43<09:00,  3.28it/s]

Epoch: 2, Loss: 1.7957366704940796


Processing epoch 01:  63%|██████▎   | 3075/4850 [14:43<08:58,  3.30it/s]

Epoch: 2, Loss: 2.2451131343841553


Processing epoch 01:  63%|██████▎   | 3076/4850 [14:43<08:49,  3.35it/s]

Epoch: 2, Loss: 2.7701575756073


Processing epoch 01:  63%|██████▎   | 3077/4850 [14:44<08:45,  3.37it/s]

Epoch: 2, Loss: 3.1082446575164795


Processing epoch 01:  63%|██████▎   | 3078/4850 [14:44<08:39,  3.41it/s]

Epoch: 2, Loss: 1.7916889190673828


Processing epoch 01:  63%|██████▎   | 3079/4850 [14:44<08:36,  3.43it/s]

Epoch: 2, Loss: 2.8351078033447266


Processing epoch 01:  64%|██████▎   | 3080/4850 [14:45<08:29,  3.47it/s]

Epoch: 2, Loss: 2.655346155166626


Processing epoch 01:  64%|██████▎   | 3081/4850 [14:45<08:27,  3.48it/s]

Epoch: 2, Loss: 3.1887850761413574


Processing epoch 01:  64%|██████▎   | 3082/4850 [14:45<08:29,  3.47it/s]

Epoch: 2, Loss: 2.429725170135498


Processing epoch 01:  64%|██████▎   | 3083/4850 [14:45<08:26,  3.49it/s]

Epoch: 2, Loss: 2.023449659347534


Processing epoch 01:  64%|██████▎   | 3084/4850 [14:46<08:25,  3.49it/s]

Epoch: 2, Loss: 4.202298641204834


Processing epoch 01:  64%|██████▎   | 3085/4850 [14:46<08:24,  3.50it/s]

Epoch: 2, Loss: 2.50919771194458


Processing epoch 01:  64%|██████▎   | 3086/4850 [14:46<08:23,  3.50it/s]

Epoch: 2, Loss: 2.6464529037475586


Processing epoch 01:  64%|██████▎   | 3087/4850 [14:47<08:22,  3.51it/s]

Epoch: 2, Loss: 2.5573391914367676


Processing epoch 01:  64%|██████▎   | 3088/4850 [14:47<08:25,  3.49it/s]

Epoch: 2, Loss: 2.562026023864746


Processing epoch 01:  64%|██████▎   | 3089/4850 [14:47<08:22,  3.51it/s]

Epoch: 2, Loss: 2.417843818664551


Processing epoch 01:  64%|██████▎   | 3090/4850 [14:47<08:20,  3.52it/s]

Epoch: 2, Loss: 2.483642578125


Processing epoch 01:  64%|██████▎   | 3091/4850 [14:48<08:18,  3.53it/s]

Epoch: 2, Loss: 2.5130062103271484


Processing epoch 01:  64%|██████▍   | 3092/4850 [14:48<08:17,  3.53it/s]

Epoch: 2, Loss: 2.5493111610412598


Processing epoch 01:  64%|██████▍   | 3093/4850 [14:48<08:17,  3.53it/s]

Epoch: 2, Loss: 2.543003559112549


Processing epoch 01:  64%|██████▍   | 3094/4850 [14:49<08:18,  3.53it/s]

Epoch: 2, Loss: 2.6383955478668213


Processing epoch 01:  64%|██████▍   | 3095/4850 [14:49<08:19,  3.51it/s]

Epoch: 2, Loss: 2.109217643737793


Processing epoch 01:  64%|██████▍   | 3096/4850 [14:49<08:20,  3.51it/s]

Epoch: 2, Loss: 2.481806993484497


Processing epoch 01:  64%|██████▍   | 3097/4850 [14:49<08:18,  3.51it/s]

Epoch: 2, Loss: 2.53523325920105


Processing epoch 01:  64%|██████▍   | 3098/4850 [14:50<08:21,  3.49it/s]

Epoch: 2, Loss: 2.603424072265625


Processing epoch 01:  64%|██████▍   | 3099/4850 [14:50<08:20,  3.50it/s]

Epoch: 2, Loss: 2.3130266666412354


Processing epoch 01:  64%|██████▍   | 3100/4850 [14:50<08:20,  3.50it/s]

Epoch: 2, Loss: 2.5050206184387207


Processing epoch 01:  64%|██████▍   | 3101/4850 [14:51<08:19,  3.50it/s]

Epoch: 2, Loss: 2.5815987586975098


Processing epoch 01:  64%|██████▍   | 3102/4850 [14:51<08:19,  3.50it/s]

Epoch: 2, Loss: 2.8925392627716064


Processing epoch 01:  64%|██████▍   | 3103/4850 [14:51<08:18,  3.50it/s]

Epoch: 2, Loss: 2.8290696144104004


Processing epoch 01:  64%|██████▍   | 3104/4850 [14:51<08:21,  3.48it/s]

Epoch: 2, Loss: 2.3166327476501465


Processing epoch 01:  64%|██████▍   | 3105/4850 [14:52<08:20,  3.49it/s]

Epoch: 2, Loss: 2.2206246852874756


Processing epoch 01:  64%|██████▍   | 3106/4850 [14:52<08:20,  3.49it/s]

Epoch: 2, Loss: 2.334273099899292


Processing epoch 01:  64%|██████▍   | 3107/4850 [14:52<08:20,  3.48it/s]

Epoch: 2, Loss: 2.3108773231506348


Processing epoch 01:  64%|██████▍   | 3108/4850 [14:53<08:19,  3.49it/s]

Epoch: 2, Loss: 2.3395862579345703


Processing epoch 01:  64%|██████▍   | 3109/4850 [14:53<08:18,  3.49it/s]

Epoch: 2, Loss: 2.3742034435272217


Processing epoch 01:  64%|██████▍   | 3110/4850 [14:53<08:22,  3.46it/s]

Epoch: 2, Loss: 2.521749496459961


Processing epoch 01:  64%|██████▍   | 3111/4850 [14:53<08:20,  3.47it/s]

Epoch: 2, Loss: 2.57705020904541


Processing epoch 01:  64%|██████▍   | 3112/4850 [14:54<08:20,  3.47it/s]

Epoch: 2, Loss: 2.767669677734375


Processing epoch 01:  64%|██████▍   | 3113/4850 [14:54<08:20,  3.47it/s]

Epoch: 2, Loss: 2.2883405685424805


Processing epoch 01:  64%|██████▍   | 3114/4850 [14:54<08:19,  3.47it/s]

Epoch: 2, Loss: 2.2516367435455322


Processing epoch 01:  64%|██████▍   | 3115/4850 [14:55<08:25,  3.43it/s]

Epoch: 2, Loss: 2.497385263442993


Processing epoch 01:  64%|██████▍   | 3116/4850 [14:55<08:29,  3.41it/s]

Epoch: 2, Loss: 2.9178996086120605


Processing epoch 01:  64%|██████▍   | 3117/4850 [14:55<08:22,  3.45it/s]

Epoch: 2, Loss: 2.5277490615844727


Processing epoch 01:  64%|██████▍   | 3118/4850 [14:56<08:33,  3.37it/s]

Epoch: 2, Loss: 2.391005277633667


Processing epoch 01:  64%|██████▍   | 3119/4850 [14:56<08:35,  3.36it/s]

Epoch: 2, Loss: 2.529726982116699


Processing epoch 01:  64%|██████▍   | 3120/4850 [14:56<08:27,  3.41it/s]

Epoch: 2, Loss: 2.133643627166748


Processing epoch 01:  64%|██████▍   | 3121/4850 [14:56<08:24,  3.43it/s]

Epoch: 2, Loss: 2.3132357597351074


Processing epoch 01:  64%|██████▍   | 3122/4850 [14:57<08:21,  3.44it/s]

Epoch: 2, Loss: 1.9188005924224854


Processing epoch 01:  64%|██████▍   | 3123/4850 [14:57<08:17,  3.47it/s]

Epoch: 2, Loss: 2.4810235500335693


Processing epoch 01:  64%|██████▍   | 3124/4850 [14:57<08:15,  3.49it/s]

Epoch: 2, Loss: 2.6541852951049805


Processing epoch 01:  64%|██████▍   | 3125/4850 [14:58<08:16,  3.47it/s]

Epoch: 2, Loss: 3.363377332687378


Processing epoch 01:  64%|██████▍   | 3126/4850 [14:58<08:32,  3.36it/s]

Epoch: 2, Loss: 3.218852996826172


Processing epoch 01:  64%|██████▍   | 3127/4850 [14:58<08:25,  3.41it/s]

Epoch: 2, Loss: 2.100999116897583


Processing epoch 01:  64%|██████▍   | 3128/4850 [14:58<08:17,  3.46it/s]

Epoch: 2, Loss: 2.8120598793029785


Processing epoch 01:  65%|██████▍   | 3129/4850 [14:59<08:16,  3.47it/s]

Epoch: 2, Loss: 2.195355176925659


Processing epoch 01:  65%|██████▍   | 3130/4850 [14:59<08:14,  3.48it/s]

Epoch: 2, Loss: 2.3737306594848633


Processing epoch 01:  65%|██████▍   | 3131/4850 [14:59<08:10,  3.51it/s]

Epoch: 2, Loss: 2.354783535003662


Processing epoch 01:  65%|██████▍   | 3132/4850 [15:00<08:07,  3.52it/s]

Epoch: 2, Loss: 2.348655939102173


Processing epoch 01:  65%|██████▍   | 3133/4850 [15:00<08:06,  3.53it/s]

Epoch: 2, Loss: 4.079421043395996


Processing epoch 01:  65%|██████▍   | 3134/4850 [15:00<08:05,  3.54it/s]

Epoch: 2, Loss: 2.834831953048706


Processing epoch 01:  65%|██████▍   | 3135/4850 [15:00<08:03,  3.55it/s]

Epoch: 2, Loss: 2.754176616668701


Processing epoch 01:  65%|██████▍   | 3136/4850 [15:01<08:02,  3.55it/s]

Epoch: 2, Loss: 2.980971336364746


Processing epoch 01:  65%|██████▍   | 3137/4850 [15:01<08:05,  3.53it/s]

Epoch: 2, Loss: 3.115705966949463


Processing epoch 01:  65%|██████▍   | 3138/4850 [15:01<08:03,  3.54it/s]

Epoch: 2, Loss: 2.8098034858703613


Processing epoch 01:  65%|██████▍   | 3139/4850 [15:02<08:04,  3.53it/s]

Epoch: 2, Loss: 2.908597707748413


Processing epoch 01:  65%|██████▍   | 3140/4850 [15:02<08:05,  3.52it/s]

Epoch: 2, Loss: 2.7919187545776367


Processing epoch 01:  65%|██████▍   | 3141/4850 [15:02<08:07,  3.50it/s]

Epoch: 2, Loss: 2.3379619121551514


Processing epoch 01:  65%|██████▍   | 3142/4850 [15:02<08:07,  3.51it/s]

Epoch: 2, Loss: 2.468899726867676


Processing epoch 01:  65%|██████▍   | 3143/4850 [15:03<08:07,  3.50it/s]

Epoch: 2, Loss: 2.5704517364501953


Processing epoch 01:  65%|██████▍   | 3144/4850 [15:03<08:08,  3.50it/s]

Epoch: 2, Loss: 2.6116552352905273


Processing epoch 01:  65%|██████▍   | 3145/4850 [15:03<08:07,  3.50it/s]

Epoch: 2, Loss: 2.19142484664917


Processing epoch 01:  65%|██████▍   | 3146/4850 [15:04<08:06,  3.50it/s]

Epoch: 2, Loss: 2.6949849128723145


Processing epoch 01:  65%|██████▍   | 3147/4850 [15:04<08:02,  3.53it/s]

Epoch: 2, Loss: 3.14961314201355


Processing epoch 01:  65%|██████▍   | 3148/4850 [15:04<08:09,  3.48it/s]

Epoch: 2, Loss: 2.0061893463134766


Processing epoch 01:  65%|██████▍   | 3149/4850 [15:04<08:05,  3.50it/s]

Epoch: 2, Loss: 2.804229259490967


Processing epoch 01:  65%|██████▍   | 3150/4850 [15:05<08:02,  3.52it/s]

Epoch: 2, Loss: 2.842613458633423


Processing epoch 01:  65%|██████▍   | 3151/4850 [15:05<08:01,  3.53it/s]

Epoch: 2, Loss: 2.5926389694213867


Processing epoch 01:  65%|██████▍   | 3152/4850 [15:05<08:00,  3.53it/s]

Epoch: 2, Loss: 2.7292320728302


Processing epoch 01:  65%|██████▌   | 3153/4850 [15:06<07:58,  3.55it/s]

Epoch: 2, Loss: 2.6038413047790527


Processing epoch 01:  65%|██████▌   | 3154/4850 [15:06<07:59,  3.54it/s]

Epoch: 2, Loss: 3.2344048023223877


Processing epoch 01:  65%|██████▌   | 3155/4850 [15:06<08:00,  3.53it/s]

Epoch: 2, Loss: 2.6391661167144775


Processing epoch 01:  65%|██████▌   | 3156/4850 [15:06<07:59,  3.54it/s]

Epoch: 2, Loss: 2.4535317420959473


Processing epoch 01:  65%|██████▌   | 3157/4850 [15:07<07:58,  3.54it/s]

Epoch: 2, Loss: 2.4759936332702637


Processing epoch 01:  65%|██████▌   | 3158/4850 [15:07<07:55,  3.55it/s]

Epoch: 2, Loss: 2.71187686920166


Processing epoch 01:  65%|██████▌   | 3159/4850 [15:07<08:03,  3.50it/s]

Epoch: 2, Loss: 2.2801356315612793


Processing epoch 01:  65%|██████▌   | 3160/4850 [15:08<08:00,  3.52it/s]

Epoch: 2, Loss: 2.3199427127838135


Processing epoch 01:  65%|██████▌   | 3161/4850 [15:08<08:04,  3.48it/s]

Epoch: 2, Loss: 1.9063326120376587


Processing epoch 01:  65%|██████▌   | 3162/4850 [15:08<08:07,  3.46it/s]

Epoch: 2, Loss: 2.4306704998016357


Processing epoch 01:  65%|██████▌   | 3163/4850 [15:08<08:15,  3.41it/s]

Epoch: 2, Loss: 2.8344082832336426


Processing epoch 01:  65%|██████▌   | 3164/4850 [15:09<08:25,  3.33it/s]

Epoch: 2, Loss: 2.4624156951904297


Processing epoch 01:  65%|██████▌   | 3165/4850 [15:09<08:33,  3.28it/s]

Epoch: 2, Loss: 2.384838104248047


Processing epoch 01:  65%|██████▌   | 3166/4850 [15:09<08:35,  3.26it/s]

Epoch: 2, Loss: 2.4346847534179688


Processing epoch 01:  65%|██████▌   | 3167/4850 [15:10<08:24,  3.34it/s]

Epoch: 2, Loss: 3.1579549312591553


Processing epoch 01:  65%|██████▌   | 3168/4850 [15:10<08:17,  3.38it/s]

Epoch: 2, Loss: 2.773352861404419


Processing epoch 01:  65%|██████▌   | 3169/4850 [15:10<08:23,  3.34it/s]

Epoch: 2, Loss: 2.381802558898926


Processing epoch 01:  65%|██████▌   | 3170/4850 [15:11<08:25,  3.32it/s]

Epoch: 2, Loss: 2.56477689743042


Processing epoch 01:  65%|██████▌   | 3171/4850 [15:11<08:21,  3.35it/s]

Epoch: 2, Loss: 2.3661789894104004


Processing epoch 01:  65%|██████▌   | 3172/4850 [15:11<08:22,  3.34it/s]

Epoch: 2, Loss: 2.618407726287842


Processing epoch 01:  65%|██████▌   | 3173/4850 [15:11<08:22,  3.34it/s]

Epoch: 2, Loss: 2.2838246822357178


Processing epoch 01:  65%|██████▌   | 3174/4850 [15:12<08:18,  3.36it/s]

Epoch: 2, Loss: 2.870861053466797


Processing epoch 01:  65%|██████▌   | 3175/4850 [15:12<08:13,  3.39it/s]

Epoch: 2, Loss: 2.3182878494262695


Processing epoch 01:  65%|██████▌   | 3176/4850 [15:12<08:16,  3.37it/s]

Epoch: 2, Loss: 2.6600708961486816


Processing epoch 01:  66%|██████▌   | 3177/4850 [15:13<08:21,  3.34it/s]

Epoch: 2, Loss: 1.9053990840911865


Processing epoch 01:  66%|██████▌   | 3178/4850 [15:13<08:18,  3.35it/s]

Epoch: 2, Loss: 2.2564313411712646


Processing epoch 01:  66%|██████▌   | 3179/4850 [15:13<08:11,  3.40it/s]

Epoch: 2, Loss: 2.3024814128875732


Processing epoch 01:  66%|██████▌   | 3180/4850 [15:13<08:04,  3.44it/s]

Epoch: 2, Loss: 3.6038591861724854


Processing epoch 01:  66%|██████▌   | 3181/4850 [15:14<08:02,  3.46it/s]

Epoch: 2, Loss: 2.787893772125244


Processing epoch 01:  66%|██████▌   | 3182/4850 [15:14<08:00,  3.47it/s]

Epoch: 2, Loss: 2.4514217376708984


Processing epoch 01:  66%|██████▌   | 3183/4850 [15:14<07:58,  3.49it/s]

Epoch: 2, Loss: 2.7261033058166504


Processing epoch 01:  66%|██████▌   | 3184/4850 [15:15<08:04,  3.44it/s]

Epoch: 2, Loss: 2.0019233226776123


Processing epoch 01:  66%|██████▌   | 3185/4850 [15:15<08:02,  3.45it/s]

Epoch: 2, Loss: 2.3448431491851807


Processing epoch 01:  66%|██████▌   | 3186/4850 [15:15<07:58,  3.48it/s]

Epoch: 2, Loss: 2.7286758422851562


Processing epoch 01:  66%|██████▌   | 3187/4850 [15:15<07:55,  3.50it/s]

Epoch: 2, Loss: 2.281805992126465


Processing epoch 01:  66%|██████▌   | 3188/4850 [15:16<07:53,  3.51it/s]

Epoch: 2, Loss: 3.159818172454834


Processing epoch 01:  66%|██████▌   | 3189/4850 [15:16<07:54,  3.50it/s]

Epoch: 2, Loss: 2.6241955757141113


Processing epoch 01:  66%|██████▌   | 3190/4850 [15:16<07:52,  3.51it/s]

Epoch: 2, Loss: 2.1982624530792236


Processing epoch 01:  66%|██████▌   | 3191/4850 [15:17<07:51,  3.52it/s]

Epoch: 2, Loss: 2.671029806137085


Processing epoch 01:  66%|██████▌   | 3192/4850 [15:17<07:50,  3.52it/s]

Epoch: 2, Loss: 2.147519588470459


Processing epoch 01:  66%|██████▌   | 3193/4850 [15:17<07:49,  3.53it/s]

Epoch: 2, Loss: 2.655900001525879


Processing epoch 01:  66%|██████▌   | 3194/4850 [15:17<07:50,  3.52it/s]

Epoch: 2, Loss: 2.868436813354492


Processing epoch 01:  66%|██████▌   | 3195/4850 [15:18<07:56,  3.47it/s]

Epoch: 2, Loss: 2.489140033721924


Processing epoch 01:  66%|██████▌   | 3196/4850 [15:18<07:56,  3.47it/s]

Epoch: 2, Loss: 2.335705518722534


Processing epoch 01:  66%|██████▌   | 3197/4850 [15:18<07:53,  3.49it/s]

Epoch: 2, Loss: 1.9561630487442017


Processing epoch 01:  66%|██████▌   | 3198/4850 [15:19<07:50,  3.51it/s]

Epoch: 2, Loss: 2.7445411682128906


Processing epoch 01:  66%|██████▌   | 3199/4850 [15:19<07:53,  3.49it/s]

Epoch: 2, Loss: 2.838139295578003


Processing epoch 01:  66%|██████▌   | 3200/4850 [15:19<07:54,  3.48it/s]

Epoch: 2, Loss: 2.3413543701171875


Processing epoch 01:  66%|██████▌   | 3201/4850 [15:19<07:49,  3.51it/s]

Epoch: 2, Loss: 2.5020530223846436


Processing epoch 01:  66%|██████▌   | 3202/4850 [15:20<07:48,  3.52it/s]

Epoch: 2, Loss: 2.643521308898926


Processing epoch 01:  66%|██████▌   | 3203/4850 [15:20<07:48,  3.52it/s]

Epoch: 2, Loss: 2.2704577445983887


Processing epoch 01:  66%|██████▌   | 3204/4850 [15:20<07:45,  3.54it/s]

Epoch: 2, Loss: 2.246680736541748


Processing epoch 01:  66%|██████▌   | 3205/4850 [15:21<07:45,  3.53it/s]

Epoch: 2, Loss: 2.641411542892456


Processing epoch 01:  66%|██████▌   | 3206/4850 [15:21<07:54,  3.47it/s]

Epoch: 2, Loss: 2.148225784301758


Processing epoch 01:  66%|██████▌   | 3207/4850 [15:21<07:51,  3.48it/s]

Epoch: 2, Loss: 3.2184629440307617


Processing epoch 01:  66%|██████▌   | 3208/4850 [15:21<07:49,  3.50it/s]

Epoch: 2, Loss: 3.4217448234558105


Processing epoch 01:  66%|██████▌   | 3209/4850 [15:22<07:49,  3.50it/s]

Epoch: 2, Loss: 1.779374361038208


Processing epoch 01:  66%|██████▌   | 3210/4850 [15:22<07:47,  3.51it/s]

Epoch: 2, Loss: 2.4777255058288574


Processing epoch 01:  66%|██████▌   | 3211/4850 [15:22<07:47,  3.51it/s]

Epoch: 2, Loss: 2.647904872894287


Processing epoch 01:  66%|██████▌   | 3212/4850 [15:23<07:45,  3.52it/s]

Epoch: 2, Loss: 2.783809185028076


Processing epoch 01:  66%|██████▌   | 3213/4850 [15:23<07:57,  3.43it/s]

Epoch: 2, Loss: 2.5427074432373047


Processing epoch 01:  66%|██████▋   | 3214/4850 [15:23<08:00,  3.41it/s]

Epoch: 2, Loss: 2.313894510269165


Processing epoch 01:  66%|██████▋   | 3215/4850 [15:24<07:59,  3.41it/s]

Epoch: 2, Loss: 2.5974292755126953


Processing epoch 01:  66%|██████▋   | 3216/4850 [15:24<08:07,  3.35it/s]

Epoch: 2, Loss: 2.3687267303466797


Processing epoch 01:  66%|██████▋   | 3217/4850 [15:24<08:07,  3.35it/s]

Epoch: 2, Loss: 2.631974458694458


Processing epoch 01:  66%|██████▋   | 3218/4850 [15:24<08:08,  3.34it/s]

Epoch: 2, Loss: 2.3034825325012207


Processing epoch 01:  66%|██████▋   | 3219/4850 [15:25<08:01,  3.39it/s]

Epoch: 2, Loss: 2.9597833156585693


Processing epoch 01:  66%|██████▋   | 3220/4850 [15:25<08:03,  3.37it/s]

Epoch: 2, Loss: 2.574962615966797


Processing epoch 01:  66%|██████▋   | 3221/4850 [15:25<08:05,  3.35it/s]

Epoch: 2, Loss: 2.898937225341797


Processing epoch 01:  66%|██████▋   | 3222/4850 [15:26<08:04,  3.36it/s]

Epoch: 2, Loss: 2.6957314014434814


Processing epoch 01:  66%|██████▋   | 3223/4850 [15:26<07:58,  3.40it/s]

Epoch: 2, Loss: 2.6030421257019043


Processing epoch 01:  66%|██████▋   | 3224/4850 [15:26<08:06,  3.34it/s]

Epoch: 2, Loss: 2.3561415672302246


Processing epoch 01:  66%|██████▋   | 3225/4850 [15:27<08:07,  3.34it/s]

Epoch: 2, Loss: 2.8398561477661133


Processing epoch 01:  67%|██████▋   | 3226/4850 [15:27<08:07,  3.33it/s]

Epoch: 2, Loss: 2.7478513717651367


Processing epoch 01:  67%|██████▋   | 3227/4850 [15:27<08:10,  3.31it/s]

Epoch: 2, Loss: 2.1103644371032715


Processing epoch 01:  67%|██████▋   | 3228/4850 [15:27<08:07,  3.32it/s]

Epoch: 2, Loss: 2.191340446472168


Processing epoch 01:  67%|██████▋   | 3229/4850 [15:28<08:00,  3.38it/s]

Epoch: 2, Loss: 2.4169960021972656


Processing epoch 01:  67%|██████▋   | 3230/4850 [15:28<07:53,  3.42it/s]

Epoch: 2, Loss: 2.842703342437744


Processing epoch 01:  67%|██████▋   | 3231/4850 [15:28<07:49,  3.45it/s]

Epoch: 2, Loss: 3.101221799850464


Processing epoch 01:  67%|██████▋   | 3232/4850 [15:29<07:51,  3.43it/s]

Epoch: 2, Loss: 2.331986904144287


Processing epoch 01:  67%|██████▋   | 3233/4850 [15:29<07:50,  3.44it/s]

Epoch: 2, Loss: 2.3742382526397705


Processing epoch 01:  67%|██████▋   | 3234/4850 [15:29<07:45,  3.47it/s]

Epoch: 2, Loss: 2.9888346195220947


Processing epoch 01:  67%|██████▋   | 3235/4850 [15:29<07:42,  3.49it/s]

Epoch: 2, Loss: 2.4744582176208496


Processing epoch 01:  67%|██████▋   | 3236/4850 [15:30<07:41,  3.49it/s]

Epoch: 2, Loss: 2.1092774868011475


Processing epoch 01:  67%|██████▋   | 3237/4850 [15:30<07:40,  3.50it/s]

Epoch: 2, Loss: 2.118350028991699


Processing epoch 01:  67%|██████▋   | 3238/4850 [15:30<07:42,  3.48it/s]

Epoch: 2, Loss: 2.36074161529541


Processing epoch 01:  67%|██████▋   | 3239/4850 [15:31<07:40,  3.50it/s]

Epoch: 2, Loss: 2.73447847366333


Processing epoch 01:  67%|██████▋   | 3240/4850 [15:31<07:43,  3.48it/s]

Epoch: 2, Loss: 2.141005754470825


Processing epoch 01:  67%|██████▋   | 3241/4850 [15:31<07:41,  3.49it/s]

Epoch: 2, Loss: 2.470195770263672


Processing epoch 01:  67%|██████▋   | 3242/4850 [15:31<07:40,  3.49it/s]

Epoch: 2, Loss: 2.3225414752960205


Processing epoch 01:  67%|██████▋   | 3243/4850 [15:32<07:42,  3.48it/s]

Epoch: 2, Loss: 2.6438546180725098


Processing epoch 01:  67%|██████▋   | 3244/4850 [15:32<07:40,  3.49it/s]

Epoch: 2, Loss: 2.132327079772949


Processing epoch 01:  67%|██████▋   | 3245/4850 [15:32<07:40,  3.49it/s]

Epoch: 2, Loss: 2.5089774131774902


Processing epoch 01:  67%|██████▋   | 3246/4850 [15:33<07:40,  3.49it/s]

Epoch: 2, Loss: 2.557173728942871


Processing epoch 01:  67%|██████▋   | 3247/4850 [15:33<07:38,  3.50it/s]

Epoch: 2, Loss: 2.5064163208007812


Processing epoch 01:  67%|██████▋   | 3248/4850 [15:33<07:36,  3.51it/s]

Epoch: 2, Loss: 2.3134355545043945


Processing epoch 01:  67%|██████▋   | 3249/4850 [15:33<07:42,  3.46it/s]

Epoch: 2, Loss: 2.354355812072754


Processing epoch 01:  67%|██████▋   | 3250/4850 [15:34<07:39,  3.48it/s]

Epoch: 2, Loss: 2.422090530395508


Processing epoch 01:  67%|██████▋   | 3251/4850 [15:34<07:38,  3.49it/s]

Epoch: 2, Loss: 2.358457326889038


Processing epoch 01:  67%|██████▋   | 3252/4850 [15:34<07:37,  3.49it/s]

Epoch: 2, Loss: 2.362847328186035


Processing epoch 01:  67%|██████▋   | 3253/4850 [15:35<07:37,  3.49it/s]

Epoch: 2, Loss: 2.6634368896484375


Processing epoch 01:  67%|██████▋   | 3254/4850 [15:35<07:38,  3.48it/s]

Epoch: 2, Loss: 2.568678855895996


Processing epoch 01:  67%|██████▋   | 3255/4850 [15:35<07:36,  3.49it/s]

Epoch: 2, Loss: 3.604832649230957


Processing epoch 01:  67%|██████▋   | 3256/4850 [15:35<07:34,  3.51it/s]

Epoch: 2, Loss: 2.520084857940674


Processing epoch 01:  67%|██████▋   | 3257/4850 [15:36<07:33,  3.51it/s]

Epoch: 2, Loss: 2.135249137878418


Processing epoch 01:  67%|██████▋   | 3258/4850 [15:36<07:31,  3.52it/s]

Epoch: 2, Loss: 2.4841957092285156


Processing epoch 01:  67%|██████▋   | 3259/4850 [15:36<07:29,  3.54it/s]

Epoch: 2, Loss: 3.2714200019836426


Processing epoch 01:  67%|██████▋   | 3260/4850 [15:37<07:31,  3.52it/s]

Epoch: 2, Loss: 2.473322868347168


Processing epoch 01:  67%|██████▋   | 3261/4850 [15:37<07:30,  3.52it/s]

Epoch: 2, Loss: 2.7434918880462646


Processing epoch 01:  67%|██████▋   | 3262/4850 [15:37<07:30,  3.53it/s]

Epoch: 2, Loss: 2.187450885772705


Processing epoch 01:  67%|██████▋   | 3263/4850 [15:37<07:35,  3.49it/s]

Epoch: 2, Loss: 2.85062575340271


Processing epoch 01:  67%|██████▋   | 3264/4850 [15:38<07:43,  3.42it/s]

Epoch: 2, Loss: 2.560873508453369


Processing epoch 01:  67%|██████▋   | 3265/4850 [15:38<07:41,  3.43it/s]

Epoch: 2, Loss: 2.084110736846924


Processing epoch 01:  67%|██████▋   | 3266/4850 [15:38<07:38,  3.46it/s]

Epoch: 2, Loss: 3.161877393722534


Processing epoch 01:  67%|██████▋   | 3267/4850 [15:39<07:43,  3.41it/s]

Epoch: 2, Loss: 2.6083879470825195


Processing epoch 01:  67%|██████▋   | 3268/4850 [15:39<07:47,  3.38it/s]

Epoch: 2, Loss: 2.4238269329071045


Processing epoch 01:  67%|██████▋   | 3269/4850 [15:39<07:44,  3.41it/s]

Epoch: 2, Loss: 2.60862398147583


Processing epoch 01:  67%|██████▋   | 3270/4850 [15:40<07:50,  3.36it/s]

Epoch: 2, Loss: 2.1148171424865723


Processing epoch 01:  67%|██████▋   | 3271/4850 [15:40<07:45,  3.39it/s]

Epoch: 2, Loss: 2.4599719047546387


Processing epoch 01:  67%|██████▋   | 3272/4850 [15:40<07:48,  3.37it/s]

Epoch: 2, Loss: 2.6496739387512207


Processing epoch 01:  67%|██████▋   | 3273/4850 [15:40<07:57,  3.30it/s]

Epoch: 2, Loss: 2.5394599437713623


Processing epoch 01:  68%|██████▊   | 3274/4850 [15:41<07:53,  3.33it/s]

Epoch: 2, Loss: 3.085165023803711


Processing epoch 01:  68%|██████▊   | 3275/4850 [15:41<07:48,  3.36it/s]

Epoch: 2, Loss: 2.1625707149505615


Processing epoch 01:  68%|██████▊   | 3276/4850 [15:41<07:47,  3.37it/s]

Epoch: 2, Loss: 2.551769256591797


Processing epoch 01:  68%|██████▊   | 3277/4850 [15:42<07:49,  3.35it/s]

Epoch: 2, Loss: 2.2849931716918945


Processing epoch 01:  68%|██████▊   | 3278/4850 [15:42<07:50,  3.34it/s]

Epoch: 2, Loss: 2.778878688812256


Processing epoch 01:  68%|██████▊   | 3279/4850 [15:42<07:43,  3.39it/s]

Epoch: 2, Loss: 2.2895894050598145


Processing epoch 01:  68%|██████▊   | 3280/4850 [15:42<07:39,  3.42it/s]

Epoch: 2, Loss: 2.7800025939941406


Processing epoch 01:  68%|██████▊   | 3281/4850 [15:43<07:34,  3.45it/s]

Epoch: 2, Loss: 2.7016725540161133


Processing epoch 01:  68%|██████▊   | 3282/4850 [15:43<07:32,  3.47it/s]

Epoch: 2, Loss: 2.4107117652893066


Processing epoch 01:  68%|██████▊   | 3283/4850 [15:43<07:26,  3.51it/s]

Epoch: 2, Loss: 2.951827049255371


Processing epoch 01:  68%|██████▊   | 3284/4850 [15:44<07:27,  3.50it/s]

Epoch: 2, Loss: 2.400650978088379


Processing epoch 01:  68%|██████▊   | 3285/4850 [15:44<07:31,  3.47it/s]

Epoch: 2, Loss: 2.3639564514160156


Processing epoch 01:  68%|██████▊   | 3286/4850 [15:44<07:28,  3.48it/s]

Epoch: 2, Loss: 2.283747673034668


Processing epoch 01:  68%|██████▊   | 3287/4850 [15:44<07:27,  3.50it/s]

Epoch: 2, Loss: 2.720228672027588


Processing epoch 01:  68%|██████▊   | 3288/4850 [15:45<07:29,  3.48it/s]

Epoch: 2, Loss: 2.489819288253784


Processing epoch 01:  68%|██████▊   | 3289/4850 [15:45<07:26,  3.50it/s]

Epoch: 2, Loss: 2.5039265155792236


Processing epoch 01:  68%|██████▊   | 3290/4850 [15:45<07:25,  3.50it/s]

Epoch: 2, Loss: 2.0360023975372314


Processing epoch 01:  68%|██████▊   | 3291/4850 [15:46<07:23,  3.52it/s]

Epoch: 2, Loss: 2.2384278774261475


Processing epoch 01:  68%|██████▊   | 3292/4850 [15:46<07:24,  3.50it/s]

Epoch: 2, Loss: 2.0659608840942383


Processing epoch 01:  68%|██████▊   | 3293/4850 [15:46<07:23,  3.51it/s]

Epoch: 2, Loss: 2.399745464324951


Processing epoch 01:  68%|██████▊   | 3294/4850 [15:46<07:21,  3.52it/s]

Epoch: 2, Loss: 3.294238567352295


Processing epoch 01:  68%|██████▊   | 3295/4850 [15:47<07:21,  3.52it/s]

Epoch: 2, Loss: 2.5200719833374023


Processing epoch 01:  68%|██████▊   | 3296/4850 [15:47<07:21,  3.52it/s]

Epoch: 2, Loss: 2.6554722785949707


Processing epoch 01:  68%|██████▊   | 3297/4850 [15:47<07:21,  3.52it/s]

Epoch: 2, Loss: 2.72725510597229


Processing epoch 01:  68%|██████▊   | 3298/4850 [15:48<07:20,  3.52it/s]

Epoch: 2, Loss: 2.5435054302215576


Processing epoch 01:  68%|██████▊   | 3299/4850 [15:48<07:19,  3.53it/s]

Epoch: 2, Loss: 2.99458646774292


Processing epoch 01:  68%|██████▊   | 3300/4850 [15:48<07:17,  3.54it/s]

Epoch: 2, Loss: 2.477438449859619


Processing epoch 01:  68%|██████▊   | 3301/4850 [15:48<07:17,  3.54it/s]

Epoch: 2, Loss: 2.202977180480957


Processing epoch 01:  68%|██████▊   | 3302/4850 [15:49<07:17,  3.54it/s]

Epoch: 2, Loss: 2.6682019233703613


Processing epoch 01:  68%|██████▊   | 3303/4850 [15:49<07:20,  3.51it/s]

Epoch: 2, Loss: 2.4424707889556885


Processing epoch 01:  68%|██████▊   | 3304/4850 [15:49<07:20,  3.51it/s]

Epoch: 2, Loss: 2.5850887298583984


Processing epoch 01:  68%|██████▊   | 3305/4850 [15:50<07:17,  3.53it/s]

Epoch: 2, Loss: 2.643664836883545


Processing epoch 01:  68%|██████▊   | 3306/4850 [15:50<07:17,  3.53it/s]

Epoch: 2, Loss: 2.1320276260375977


Processing epoch 01:  68%|██████▊   | 3307/4850 [15:50<07:23,  3.48it/s]

Epoch: 2, Loss: 2.482511043548584


Processing epoch 01:  68%|██████▊   | 3308/4850 [15:50<07:20,  3.50it/s]

Epoch: 2, Loss: 2.6560707092285156


Processing epoch 01:  68%|██████▊   | 3309/4850 [15:51<07:18,  3.51it/s]

Epoch: 2, Loss: 2.427809238433838


Processing epoch 01:  68%|██████▊   | 3310/4850 [15:51<07:18,  3.51it/s]

Epoch: 2, Loss: 2.5385589599609375


Processing epoch 01:  68%|██████▊   | 3311/4850 [15:51<07:17,  3.52it/s]

Epoch: 2, Loss: 2.610769271850586


Processing epoch 01:  68%|██████▊   | 3312/4850 [15:52<07:17,  3.52it/s]

Epoch: 2, Loss: 1.9089434146881104


Processing epoch 01:  68%|██████▊   | 3313/4850 [15:52<07:18,  3.51it/s]

Epoch: 2, Loss: 1.8933708667755127


Processing epoch 01:  68%|██████▊   | 3314/4850 [15:52<07:21,  3.48it/s]

Epoch: 2, Loss: 2.451101303100586


Processing epoch 01:  68%|██████▊   | 3315/4850 [15:52<07:18,  3.50it/s]

Epoch: 2, Loss: 2.512248992919922


Processing epoch 01:  68%|██████▊   | 3316/4850 [15:53<07:20,  3.48it/s]

Epoch: 2, Loss: 2.361218214035034


Processing epoch 01:  68%|██████▊   | 3317/4850 [15:53<07:21,  3.48it/s]

Epoch: 2, Loss: 2.5563478469848633


Processing epoch 01:  68%|██████▊   | 3318/4850 [15:53<07:25,  3.44it/s]

Epoch: 2, Loss: 2.6871588230133057


Processing epoch 01:  68%|██████▊   | 3319/4850 [15:54<07:24,  3.45it/s]

Epoch: 2, Loss: 2.212113618850708


Processing epoch 01:  68%|██████▊   | 3320/4850 [15:54<07:30,  3.40it/s]

Epoch: 2, Loss: 2.604360580444336


Processing epoch 01:  68%|██████▊   | 3321/4850 [15:54<07:35,  3.36it/s]

Epoch: 2, Loss: 3.0147464275360107


Processing epoch 01:  68%|██████▊   | 3322/4850 [15:55<07:36,  3.35it/s]

Epoch: 2, Loss: 2.3867316246032715


Processing epoch 01:  69%|██████▊   | 3323/4850 [15:55<07:38,  3.33it/s]

Epoch: 2, Loss: 2.3646538257598877


Processing epoch 01:  69%|██████▊   | 3324/4850 [15:55<07:33,  3.36it/s]

Epoch: 2, Loss: 2.549734115600586


Processing epoch 01:  69%|██████▊   | 3325/4850 [15:55<07:36,  3.34it/s]

Epoch: 2, Loss: 2.404216766357422


Processing epoch 01:  69%|██████▊   | 3326/4850 [15:56<07:41,  3.30it/s]

Epoch: 2, Loss: 2.460395336151123


Processing epoch 01:  69%|██████▊   | 3327/4850 [15:56<07:40,  3.31it/s]

Epoch: 2, Loss: 2.483382225036621


Processing epoch 01:  69%|██████▊   | 3328/4850 [15:56<07:42,  3.29it/s]

Epoch: 2, Loss: 1.937608242034912


Processing epoch 01:  69%|██████▊   | 3329/4850 [15:57<07:44,  3.27it/s]

Epoch: 2, Loss: 2.2549266815185547


Processing epoch 01:  69%|██████▊   | 3330/4850 [15:57<07:34,  3.35it/s]

Epoch: 2, Loss: 2.3230011463165283


Processing epoch 01:  69%|██████▊   | 3331/4850 [15:57<07:29,  3.38it/s]

Epoch: 2, Loss: 2.246187686920166


Processing epoch 01:  69%|██████▊   | 3332/4850 [15:58<07:33,  3.35it/s]

Epoch: 2, Loss: 2.6979219913482666


Processing epoch 01:  69%|██████▊   | 3333/4850 [15:58<07:26,  3.40it/s]

Epoch: 2, Loss: 2.813467502593994


Processing epoch 01:  69%|██████▊   | 3334/4850 [15:58<07:17,  3.47it/s]

Epoch: 2, Loss: 2.959040880203247


Processing epoch 01:  69%|██████▉   | 3335/4850 [15:58<07:15,  3.48it/s]

Epoch: 2, Loss: 2.8827834129333496


Processing epoch 01:  69%|██████▉   | 3336/4850 [15:59<07:12,  3.50it/s]

Epoch: 2, Loss: 2.4624128341674805


Processing epoch 01:  69%|██████▉   | 3337/4850 [15:59<07:15,  3.47it/s]

Epoch: 2, Loss: 2.3117499351501465


Processing epoch 01:  69%|██████▉   | 3338/4850 [15:59<07:13,  3.49it/s]

Epoch: 2, Loss: 2.515347957611084


Processing epoch 01:  69%|██████▉   | 3339/4850 [16:00<07:13,  3.48it/s]

Epoch: 2, Loss: 2.095177173614502


Processing epoch 01:  69%|██████▉   | 3340/4850 [16:00<07:12,  3.49it/s]

Epoch: 2, Loss: 2.495208740234375


Processing epoch 01:  69%|██████▉   | 3341/4850 [16:00<07:11,  3.49it/s]

Epoch: 2, Loss: 2.2349820137023926


Processing epoch 01:  69%|██████▉   | 3342/4850 [16:00<07:10,  3.50it/s]

Epoch: 2, Loss: 2.818301200866699


Processing epoch 01:  69%|██████▉   | 3343/4850 [16:01<07:11,  3.49it/s]

Epoch: 2, Loss: 2.7758114337921143


Processing epoch 01:  69%|██████▉   | 3344/4850 [16:01<07:11,  3.49it/s]

Epoch: 2, Loss: 2.547706127166748


Processing epoch 01:  69%|██████▉   | 3345/4850 [16:01<07:10,  3.49it/s]

Epoch: 2, Loss: 2.3435707092285156


Processing epoch 01:  69%|██████▉   | 3346/4850 [16:02<07:09,  3.50it/s]

Epoch: 2, Loss: 2.897300958633423


Processing epoch 01:  69%|██████▉   | 3347/4850 [16:02<07:07,  3.52it/s]

Epoch: 2, Loss: 2.5488569736480713


Processing epoch 01:  69%|██████▉   | 3348/4850 [16:02<07:06,  3.52it/s]

Epoch: 2, Loss: 2.2494382858276367


Processing epoch 01:  69%|██████▉   | 3349/4850 [16:02<07:07,  3.51it/s]

Epoch: 2, Loss: 2.5848610401153564


Processing epoch 01:  69%|██████▉   | 3350/4850 [16:03<07:04,  3.53it/s]

Epoch: 2, Loss: 3.4925050735473633


Processing epoch 01:  69%|██████▉   | 3351/4850 [16:03<07:04,  3.53it/s]

Epoch: 2, Loss: 2.650102138519287


Processing epoch 01:  69%|██████▉   | 3352/4850 [16:03<07:03,  3.54it/s]

Epoch: 2, Loss: 2.828436851501465


Processing epoch 01:  69%|██████▉   | 3353/4850 [16:03<07:03,  3.54it/s]

Epoch: 2, Loss: 3.165247917175293


Processing epoch 01:  69%|██████▉   | 3354/4850 [16:04<07:06,  3.51it/s]

Epoch: 2, Loss: 2.0684542655944824


Processing epoch 01:  69%|██████▉   | 3355/4850 [16:04<07:05,  3.51it/s]

Epoch: 2, Loss: 2.9206626415252686


Processing epoch 01:  69%|██████▉   | 3356/4850 [16:04<07:04,  3.52it/s]

Epoch: 2, Loss: 2.531006097793579


Processing epoch 01:  69%|██████▉   | 3357/4850 [16:05<07:08,  3.48it/s]

Epoch: 2, Loss: 1.9440304040908813


Processing epoch 01:  69%|██████▉   | 3358/4850 [16:05<07:08,  3.49it/s]

Epoch: 2, Loss: 2.703878402709961


Processing epoch 01:  69%|██████▉   | 3359/4850 [16:05<07:05,  3.51it/s]

Epoch: 2, Loss: 3.129896640777588


Processing epoch 01:  69%|██████▉   | 3360/4850 [16:05<07:03,  3.52it/s]

Epoch: 2, Loss: 2.596745252609253


Processing epoch 01:  69%|██████▉   | 3361/4850 [16:06<07:02,  3.53it/s]

Epoch: 2, Loss: 2.794180393218994


Processing epoch 01:  69%|██████▉   | 3362/4850 [16:06<07:00,  3.54it/s]

Epoch: 2, Loss: 2.6638455390930176


Processing epoch 01:  69%|██████▉   | 3363/4850 [16:06<07:00,  3.54it/s]

Epoch: 2, Loss: 2.7014665603637695


Processing epoch 01:  69%|██████▉   | 3364/4850 [16:07<07:00,  3.53it/s]

Epoch: 2, Loss: 2.1662654876708984


Processing epoch 01:  69%|██████▉   | 3365/4850 [16:07<07:05,  3.49it/s]

Epoch: 2, Loss: 2.8031005859375


Processing epoch 01:  69%|██████▉   | 3366/4850 [16:07<07:12,  3.43it/s]

Epoch: 2, Loss: 2.2992639541625977


Processing epoch 01:  69%|██████▉   | 3367/4850 [16:08<07:10,  3.45it/s]

Epoch: 2, Loss: 2.6756582260131836


Processing epoch 01:  69%|██████▉   | 3368/4850 [16:08<07:11,  3.43it/s]

Epoch: 2, Loss: 2.307394504547119


Processing epoch 01:  69%|██████▉   | 3369/4850 [16:08<07:06,  3.47it/s]

Epoch: 2, Loss: 2.4833054542541504


Processing epoch 01:  69%|██████▉   | 3370/4850 [16:08<07:08,  3.45it/s]

Epoch: 2, Loss: 2.618089199066162


Processing epoch 01:  70%|██████▉   | 3371/4850 [16:09<07:10,  3.44it/s]

Epoch: 2, Loss: 2.3592896461486816


Processing epoch 01:  70%|██████▉   | 3372/4850 [16:09<07:12,  3.42it/s]

Epoch: 2, Loss: 2.039703607559204


Processing epoch 01:  70%|██████▉   | 3373/4850 [16:09<07:12,  3.41it/s]

Epoch: 2, Loss: 2.7677576541900635


Processing epoch 01:  70%|██████▉   | 3374/4850 [16:10<07:16,  3.38it/s]

Epoch: 2, Loss: 2.69350528717041


Processing epoch 01:  70%|██████▉   | 3375/4850 [16:10<07:18,  3.36it/s]

Epoch: 2, Loss: 2.6311638355255127


Processing epoch 01:  70%|██████▉   | 3376/4850 [16:10<07:20,  3.35it/s]

Epoch: 2, Loss: 2.6919407844543457


Processing epoch 01:  70%|██████▉   | 3377/4850 [16:10<07:22,  3.33it/s]

Epoch: 2, Loss: 2.56219744682312


Processing epoch 01:  70%|██████▉   | 3378/4850 [16:11<07:16,  3.37it/s]

Epoch: 2, Loss: 2.2924907207489014


Processing epoch 01:  70%|██████▉   | 3379/4850 [16:11<07:09,  3.43it/s]

Epoch: 2, Loss: 3.290998935699463


Processing epoch 01:  70%|██████▉   | 3380/4850 [16:11<07:12,  3.40it/s]

Epoch: 2, Loss: 2.2400591373443604


Processing epoch 01:  70%|██████▉   | 3381/4850 [16:12<07:16,  3.37it/s]

Epoch: 2, Loss: 2.838728666305542


Processing epoch 01:  70%|██████▉   | 3382/4850 [16:12<07:09,  3.42it/s]

Epoch: 2, Loss: 2.4843990802764893


Processing epoch 01:  70%|██████▉   | 3383/4850 [16:12<07:08,  3.42it/s]

Epoch: 2, Loss: 3.175205945968628


Processing epoch 01:  70%|██████▉   | 3384/4850 [16:12<07:06,  3.44it/s]

Epoch: 2, Loss: 2.6381936073303223


Processing epoch 01:  70%|██████▉   | 3385/4850 [16:13<07:04,  3.45it/s]

Epoch: 2, Loss: 2.6796367168426514


Processing epoch 01:  70%|██████▉   | 3386/4850 [16:13<07:02,  3.47it/s]

Epoch: 2, Loss: 2.9128241539001465


Processing epoch 01:  70%|██████▉   | 3387/4850 [16:13<07:01,  3.47it/s]

Epoch: 2, Loss: 2.254969596862793


Processing epoch 01:  70%|██████▉   | 3388/4850 [16:14<07:03,  3.45it/s]

Epoch: 2, Loss: 2.562241554260254


Processing epoch 01:  70%|██████▉   | 3389/4850 [16:14<07:03,  3.45it/s]

Epoch: 2, Loss: 2.8342032432556152


Processing epoch 01:  70%|██████▉   | 3390/4850 [16:14<07:01,  3.46it/s]

Epoch: 2, Loss: 2.4368104934692383


Processing epoch 01:  70%|██████▉   | 3391/4850 [16:15<07:00,  3.47it/s]

Epoch: 2, Loss: 2.797029972076416


Processing epoch 01:  70%|██████▉   | 3392/4850 [16:15<06:56,  3.50it/s]

Epoch: 2, Loss: 2.0742993354797363


Processing epoch 01:  70%|██████▉   | 3393/4850 [16:15<06:54,  3.51it/s]

Epoch: 2, Loss: 2.218876838684082


Processing epoch 01:  70%|██████▉   | 3394/4850 [16:15<07:00,  3.46it/s]

Epoch: 2, Loss: 2.5307164192199707


Processing epoch 01:  70%|███████   | 3395/4850 [16:16<06:56,  3.49it/s]

Epoch: 2, Loss: 2.7355127334594727


Processing epoch 01:  70%|███████   | 3396/4850 [16:16<06:54,  3.51it/s]

Epoch: 2, Loss: 2.8965916633605957


Processing epoch 01:  70%|███████   | 3397/4850 [16:16<06:54,  3.51it/s]

Epoch: 2, Loss: 2.3272831439971924


Processing epoch 01:  70%|███████   | 3398/4850 [16:17<06:54,  3.50it/s]

Epoch: 2, Loss: 2.0206542015075684


Processing epoch 01:  70%|███████   | 3399/4850 [16:17<06:51,  3.53it/s]

Epoch: 2, Loss: 2.6177144050598145


Processing epoch 01:  70%|███████   | 3400/4850 [16:17<06:49,  3.54it/s]

Epoch: 2, Loss: 2.725008964538574


Processing epoch 01:  70%|███████   | 3401/4850 [16:17<06:53,  3.50it/s]

Epoch: 2, Loss: 2.183526039123535


Processing epoch 01:  70%|███████   | 3402/4850 [16:18<06:51,  3.52it/s]

Epoch: 2, Loss: 2.7323546409606934


Processing epoch 01:  70%|███████   | 3403/4850 [16:18<06:49,  3.53it/s]

Epoch: 2, Loss: 2.830657482147217


Processing epoch 01:  70%|███████   | 3404/4850 [16:18<06:51,  3.52it/s]

Epoch: 2, Loss: 2.3477678298950195


Processing epoch 01:  70%|███████   | 3405/4850 [16:19<06:57,  3.46it/s]

Epoch: 2, Loss: 2.7836689949035645


Processing epoch 01:  70%|███████   | 3406/4850 [16:19<06:54,  3.48it/s]

Epoch: 2, Loss: 2.2843525409698486


Processing epoch 01:  70%|███████   | 3407/4850 [16:19<06:53,  3.49it/s]

Epoch: 2, Loss: 2.718496799468994


Processing epoch 01:  70%|███████   | 3408/4850 [16:19<06:50,  3.51it/s]

Epoch: 2, Loss: 2.1364996433258057


Processing epoch 01:  70%|███████   | 3409/4850 [16:20<06:50,  3.51it/s]

Epoch: 2, Loss: 2.306962490081787


Processing epoch 01:  70%|███████   | 3410/4850 [16:20<06:51,  3.50it/s]

Epoch: 2, Loss: 2.548522710800171


Processing epoch 01:  70%|███████   | 3411/4850 [16:20<06:51,  3.50it/s]

Epoch: 2, Loss: 2.5317513942718506


Processing epoch 01:  70%|███████   | 3412/4850 [16:20<06:50,  3.50it/s]

Epoch: 2, Loss: 2.7772483825683594


Processing epoch 01:  70%|███████   | 3413/4850 [16:21<06:51,  3.50it/s]

Epoch: 2, Loss: 2.688809394836426


Processing epoch 01:  70%|███████   | 3414/4850 [16:21<06:47,  3.52it/s]

Epoch: 2, Loss: 2.6410346031188965


Processing epoch 01:  70%|███████   | 3415/4850 [16:21<06:47,  3.52it/s]

Epoch: 2, Loss: 2.682849407196045


Processing epoch 01:  70%|███████   | 3416/4850 [16:22<06:51,  3.48it/s]

Epoch: 2, Loss: 2.4508442878723145


Processing epoch 01:  70%|███████   | 3417/4850 [16:22<06:57,  3.43it/s]

Epoch: 2, Loss: 2.730376720428467


Processing epoch 01:  70%|███████   | 3418/4850 [16:22<07:04,  3.38it/s]

Epoch: 2, Loss: 2.531010150909424


Processing epoch 01:  70%|███████   | 3419/4850 [16:23<07:03,  3.38it/s]

Epoch: 2, Loss: 2.179699182510376


Processing epoch 01:  71%|███████   | 3420/4850 [16:23<07:01,  3.39it/s]

Epoch: 2, Loss: 2.302889347076416


Processing epoch 01:  71%|███████   | 3421/4850 [16:23<07:02,  3.38it/s]

Epoch: 2, Loss: 2.3373541831970215


Processing epoch 01:  71%|███████   | 3422/4850 [16:23<06:55,  3.44it/s]

Epoch: 2, Loss: 2.624018669128418


Processing epoch 01:  71%|███████   | 3423/4850 [16:24<06:57,  3.42it/s]

Epoch: 2, Loss: 2.5172712802886963


Processing epoch 01:  71%|███████   | 3424/4850 [16:24<07:00,  3.39it/s]

Epoch: 2, Loss: 2.4557154178619385


Processing epoch 01:  71%|███████   | 3425/4850 [16:24<07:03,  3.37it/s]

Epoch: 2, Loss: 2.527250289916992


Processing epoch 01:  71%|███████   | 3426/4850 [16:25<07:04,  3.36it/s]

Epoch: 2, Loss: 2.743407726287842


Processing epoch 01:  71%|███████   | 3427/4850 [16:25<07:05,  3.34it/s]

Epoch: 2, Loss: 2.4915215969085693


Processing epoch 01:  71%|███████   | 3428/4850 [16:25<07:06,  3.34it/s]

Epoch: 2, Loss: 2.5558905601501465


Processing epoch 01:  71%|███████   | 3429/4850 [16:26<06:59,  3.38it/s]

Epoch: 2, Loss: 2.4244625568389893


Processing epoch 01:  71%|███████   | 3430/4850 [16:26<06:58,  3.39it/s]

Epoch: 2, Loss: 2.651580810546875


Processing epoch 01:  71%|███████   | 3431/4850 [16:26<06:57,  3.40it/s]

Epoch: 2, Loss: 2.6703004837036133


Processing epoch 01:  71%|███████   | 3432/4850 [16:26<06:56,  3.41it/s]

Epoch: 2, Loss: 2.8882675170898438


Processing epoch 01:  71%|███████   | 3433/4850 [16:27<06:59,  3.38it/s]

Epoch: 2, Loss: 2.388706684112549


Processing epoch 01:  71%|███████   | 3434/4850 [16:27<06:53,  3.42it/s]

Epoch: 2, Loss: 2.1193926334381104


Processing epoch 01:  71%|███████   | 3435/4850 [16:27<06:49,  3.46it/s]

Epoch: 2, Loss: 2.2439653873443604


Processing epoch 01:  71%|███████   | 3436/4850 [16:28<06:48,  3.46it/s]

Epoch: 2, Loss: 2.4011106491088867


Processing epoch 01:  71%|███████   | 3437/4850 [16:28<06:45,  3.48it/s]

Epoch: 2, Loss: 2.4084198474884033


Processing epoch 01:  71%|███████   | 3438/4850 [16:28<06:44,  3.49it/s]

Epoch: 2, Loss: 2.626668930053711


Processing epoch 01:  71%|███████   | 3439/4850 [16:28<06:42,  3.50it/s]

Epoch: 2, Loss: 2.5084095001220703


Processing epoch 01:  71%|███████   | 3440/4850 [16:29<06:41,  3.51it/s]

Epoch: 2, Loss: 2.349388360977173


Processing epoch 01:  71%|███████   | 3441/4850 [16:29<06:43,  3.49it/s]

Epoch: 2, Loss: 2.70479679107666


Processing epoch 01:  71%|███████   | 3442/4850 [16:29<06:42,  3.50it/s]

Epoch: 2, Loss: 2.3635363578796387


Processing epoch 01:  71%|███████   | 3443/4850 [16:30<06:40,  3.52it/s]

Epoch: 2, Loss: 2.4477081298828125


Processing epoch 01:  71%|███████   | 3444/4850 [16:30<06:40,  3.51it/s]

Epoch: 2, Loss: 2.2458248138427734


Processing epoch 01:  71%|███████   | 3445/4850 [16:30<06:38,  3.53it/s]

Epoch: 2, Loss: 2.458405017852783


Processing epoch 01:  71%|███████   | 3446/4850 [16:30<06:39,  3.51it/s]

Epoch: 2, Loss: 2.110396146774292


Processing epoch 01:  71%|███████   | 3447/4850 [16:31<06:40,  3.51it/s]

Epoch: 2, Loss: 2.611452579498291


Processing epoch 01:  71%|███████   | 3448/4850 [16:31<06:36,  3.53it/s]

Epoch: 2, Loss: 3.5601205825805664


Processing epoch 01:  71%|███████   | 3449/4850 [16:31<06:37,  3.52it/s]

Epoch: 2, Loss: 2.713305950164795


Processing epoch 01:  71%|███████   | 3450/4850 [16:32<06:37,  3.52it/s]

Epoch: 2, Loss: 1.8332620859146118


Processing epoch 01:  71%|███████   | 3451/4850 [16:32<06:37,  3.52it/s]

Epoch: 2, Loss: 2.545520782470703


Processing epoch 01:  71%|███████   | 3452/4850 [16:32<06:42,  3.47it/s]

Epoch: 2, Loss: 2.422523021697998


Processing epoch 01:  71%|███████   | 3453/4850 [16:32<06:37,  3.51it/s]

Epoch: 2, Loss: 3.659651041030884


Processing epoch 01:  71%|███████   | 3454/4850 [16:33<06:35,  3.53it/s]

Epoch: 2, Loss: 2.0602550506591797


Processing epoch 01:  71%|███████   | 3455/4850 [16:33<06:34,  3.54it/s]

Epoch: 2, Loss: 2.435170888900757


Processing epoch 01:  71%|███████▏  | 3456/4850 [16:33<06:36,  3.52it/s]

Epoch: 2, Loss: 2.4824013710021973


Processing epoch 01:  71%|███████▏  | 3457/4850 [16:34<06:36,  3.51it/s]

Epoch: 2, Loss: 2.2186341285705566


Processing epoch 01:  71%|███████▏  | 3458/4850 [16:34<06:37,  3.51it/s]

Epoch: 2, Loss: 2.8948960304260254


Processing epoch 01:  71%|███████▏  | 3459/4850 [16:34<06:34,  3.52it/s]

Epoch: 2, Loss: 2.5865776538848877


Processing epoch 01:  71%|███████▏  | 3460/4850 [16:34<06:34,  3.52it/s]

Epoch: 2, Loss: 2.6693053245544434


Processing epoch 01:  71%|███████▏  | 3461/4850 [16:35<06:34,  3.52it/s]

Epoch: 2, Loss: 2.922396183013916


Processing epoch 01:  71%|███████▏  | 3462/4850 [16:35<06:36,  3.51it/s]

Epoch: 2, Loss: 3.3301117420196533


Processing epoch 01:  71%|███████▏  | 3463/4850 [16:35<06:38,  3.48it/s]

Epoch: 2, Loss: 2.609623908996582


Processing epoch 01:  71%|███████▏  | 3464/4850 [16:36<06:35,  3.50it/s]

Epoch: 2, Loss: 2.4568934440612793


Processing epoch 01:  71%|███████▏  | 3465/4850 [16:36<06:34,  3.51it/s]

Epoch: 2, Loss: 2.1569926738739014


Processing epoch 01:  71%|███████▏  | 3466/4850 [16:36<06:33,  3.52it/s]

Epoch: 2, Loss: 2.631138324737549


Processing epoch 01:  71%|███████▏  | 3467/4850 [16:36<06:33,  3.52it/s]

Epoch: 2, Loss: 2.5024075508117676


Processing epoch 01:  72%|███████▏  | 3468/4850 [16:37<06:32,  3.52it/s]

Epoch: 2, Loss: 1.7627311944961548


Processing epoch 01:  72%|███████▏  | 3469/4850 [16:37<06:34,  3.50it/s]

Epoch: 2, Loss: 2.491882801055908


Processing epoch 01:  72%|███████▏  | 3470/4850 [16:37<06:34,  3.50it/s]

Epoch: 2, Loss: 2.3233227729797363


Processing epoch 01:  72%|███████▏  | 3471/4850 [16:38<06:39,  3.45it/s]

Epoch: 2, Loss: 2.5683469772338867


Processing epoch 01:  72%|███████▏  | 3472/4850 [16:38<06:45,  3.40it/s]

Epoch: 2, Loss: 2.339219093322754


Processing epoch 01:  72%|███████▏  | 3473/4850 [16:38<06:38,  3.46it/s]

Epoch: 2, Loss: 2.6773154735565186


Processing epoch 01:  72%|███████▏  | 3474/4850 [16:38<06:43,  3.41it/s]

Epoch: 2, Loss: 2.460486650466919


Processing epoch 01:  72%|███████▏  | 3475/4850 [16:39<06:50,  3.35it/s]

Epoch: 2, Loss: 2.5935747623443604


Processing epoch 01:  72%|███████▏  | 3476/4850 [16:39<06:47,  3.38it/s]

Epoch: 2, Loss: 2.1365203857421875


Processing epoch 01:  72%|███████▏  | 3477/4850 [16:39<06:41,  3.42it/s]

Epoch: 2, Loss: 2.5789051055908203


Processing epoch 01:  72%|███████▏  | 3478/4850 [16:40<06:44,  3.39it/s]

Epoch: 2, Loss: 2.421792507171631


Processing epoch 01:  72%|███████▏  | 3479/4850 [16:40<06:39,  3.44it/s]

Epoch: 2, Loss: 3.1552560329437256


Processing epoch 01:  72%|███████▏  | 3480/4850 [16:40<06:37,  3.45it/s]

Epoch: 2, Loss: 2.598723888397217


Processing epoch 01:  72%|███████▏  | 3481/4850 [16:40<06:38,  3.44it/s]

Epoch: 2, Loss: 2.1457252502441406


Processing epoch 01:  72%|███████▏  | 3482/4850 [16:41<06:36,  3.45it/s]

Epoch: 2, Loss: 2.5506997108459473


Processing epoch 01:  72%|███████▏  | 3483/4850 [16:41<06:46,  3.37it/s]

Epoch: 2, Loss: 2.4918031692504883


Processing epoch 01:  72%|███████▏  | 3484/4850 [16:41<06:55,  3.29it/s]

Epoch: 2, Loss: 2.4787726402282715


Processing epoch 01:  72%|███████▏  | 3485/4850 [16:42<06:48,  3.34it/s]

Epoch: 2, Loss: 2.245131254196167


Processing epoch 01:  72%|███████▏  | 3486/4850 [16:42<06:41,  3.39it/s]

Epoch: 2, Loss: 2.5182766914367676


Processing epoch 01:  72%|███████▏  | 3487/4850 [16:42<06:37,  3.43it/s]

Epoch: 2, Loss: 3.324812889099121


Processing epoch 01:  72%|███████▏  | 3488/4850 [16:43<06:34,  3.46it/s]

Epoch: 2, Loss: 2.3581273555755615


Processing epoch 01:  72%|███████▏  | 3489/4850 [16:43<06:32,  3.47it/s]

Epoch: 2, Loss: 1.9899542331695557


Processing epoch 01:  72%|███████▏  | 3490/4850 [16:43<06:30,  3.49it/s]

Epoch: 2, Loss: 2.44260835647583


Processing epoch 01:  72%|███████▏  | 3491/4850 [16:43<06:26,  3.52it/s]

Epoch: 2, Loss: 3.363086223602295


Processing epoch 01:  72%|███████▏  | 3492/4850 [16:44<06:32,  3.46it/s]

Epoch: 2, Loss: 4.149026870727539


Processing epoch 01:  72%|███████▏  | 3493/4850 [16:44<06:32,  3.46it/s]

Epoch: 2, Loss: 2.4099886417388916


Processing epoch 01:  72%|███████▏  | 3494/4850 [16:44<06:31,  3.47it/s]

Epoch: 2, Loss: 2.3020551204681396


Processing epoch 01:  72%|███████▏  | 3495/4850 [16:45<06:26,  3.50it/s]

Epoch: 2, Loss: 2.2553648948669434


Processing epoch 01:  72%|███████▏  | 3496/4850 [16:45<06:26,  3.50it/s]

Epoch: 2, Loss: 2.2263901233673096


Processing epoch 01:  72%|███████▏  | 3497/4850 [16:45<06:23,  3.53it/s]

Epoch: 2, Loss: 3.809624195098877


Processing epoch 01:  72%|███████▏  | 3498/4850 [16:45<06:23,  3.52it/s]

Epoch: 2, Loss: 2.290842056274414


Processing epoch 01:  72%|███████▏  | 3499/4850 [16:46<06:19,  3.56it/s]

Epoch: 2, Loss: 2.4630250930786133


Processing epoch 01:  72%|███████▏  | 3500/4850 [16:46<06:23,  3.52it/s]

Epoch: 2, Loss: 2.935340404510498


Processing epoch 01:  72%|███████▏  | 3501/4850 [16:46<06:21,  3.54it/s]

Epoch: 2, Loss: 3.285003423690796


Processing epoch 01:  72%|███████▏  | 3502/4850 [16:46<06:20,  3.54it/s]

Epoch: 2, Loss: 3.0464513301849365


Processing epoch 01:  72%|███████▏  | 3503/4850 [16:47<06:25,  3.49it/s]

Epoch: 2, Loss: 2.285362720489502


Processing epoch 01:  72%|███████▏  | 3504/4850 [16:47<06:21,  3.53it/s]

Epoch: 2, Loss: 4.029035568237305


Processing epoch 01:  72%|███████▏  | 3505/4850 [16:47<06:21,  3.53it/s]

Epoch: 2, Loss: 2.6919713020324707


Processing epoch 01:  72%|███████▏  | 3506/4850 [16:48<06:20,  3.53it/s]

Epoch: 2, Loss: 3.0819478034973145


Processing epoch 01:  72%|███████▏  | 3507/4850 [16:48<06:22,  3.51it/s]

Epoch: 2, Loss: 2.55771541595459


Processing epoch 01:  72%|███████▏  | 3508/4850 [16:48<06:23,  3.50it/s]

Epoch: 2, Loss: 2.244229316711426


Processing epoch 01:  72%|███████▏  | 3509/4850 [16:48<06:22,  3.51it/s]

Epoch: 2, Loss: 2.306288003921509


Processing epoch 01:  72%|███████▏  | 3510/4850 [16:49<06:22,  3.50it/s]

Epoch: 2, Loss: 2.847733974456787


Processing epoch 01:  72%|███████▏  | 3511/4850 [16:49<06:22,  3.50it/s]

Epoch: 2, Loss: 2.100560426712036


Processing epoch 01:  72%|███████▏  | 3512/4850 [16:49<06:20,  3.51it/s]

Epoch: 2, Loss: 2.170330762863159


Processing epoch 01:  72%|███████▏  | 3513/4850 [16:50<06:19,  3.52it/s]

Epoch: 2, Loss: 2.568014144897461


Processing epoch 01:  72%|███████▏  | 3514/4850 [16:50<06:21,  3.50it/s]

Epoch: 2, Loss: 2.270449161529541


Processing epoch 01:  72%|███████▏  | 3515/4850 [16:50<06:21,  3.50it/s]

Epoch: 2, Loss: 2.3901150226593018


Processing epoch 01:  72%|███████▏  | 3516/4850 [16:50<06:20,  3.50it/s]

Epoch: 2, Loss: 2.116086483001709


Processing epoch 01:  73%|███████▎  | 3517/4850 [16:51<06:20,  3.50it/s]

Epoch: 2, Loss: 2.148071765899658


Processing epoch 01:  73%|███████▎  | 3518/4850 [16:51<06:19,  3.51it/s]

Epoch: 2, Loss: 2.730736017227173


Processing epoch 01:  73%|███████▎  | 3519/4850 [16:51<06:19,  3.51it/s]

Epoch: 2, Loss: 2.6149330139160156


Processing epoch 01:  73%|███████▎  | 3520/4850 [16:52<06:22,  3.48it/s]

Epoch: 2, Loss: 2.9004716873168945


Processing epoch 01:  73%|███████▎  | 3521/4850 [16:52<06:22,  3.48it/s]

Epoch: 2, Loss: 2.6279876232147217


Processing epoch 01:  73%|███████▎  | 3522/4850 [16:52<06:28,  3.41it/s]

Epoch: 2, Loss: 2.622910261154175


Processing epoch 01:  73%|███████▎  | 3523/4850 [16:53<06:27,  3.43it/s]

Epoch: 2, Loss: 2.6189868450164795


Processing epoch 01:  73%|███████▎  | 3524/4850 [16:53<06:24,  3.45it/s]

Epoch: 2, Loss: 2.668680191040039


Processing epoch 01:  73%|███████▎  | 3525/4850 [16:53<06:25,  3.44it/s]

Epoch: 2, Loss: 2.5094428062438965


Processing epoch 01:  73%|███████▎  | 3526/4850 [16:53<06:25,  3.44it/s]

Epoch: 2, Loss: 2.0097038745880127


Processing epoch 01:  73%|███████▎  | 3527/4850 [16:54<06:20,  3.48it/s]

Epoch: 2, Loss: 2.367927074432373


Processing epoch 01:  73%|███████▎  | 3528/4850 [16:54<06:32,  3.37it/s]

Epoch: 2, Loss: 2.035858392715454


Processing epoch 01:  73%|███████▎  | 3529/4850 [16:54<06:35,  3.34it/s]

Epoch: 2, Loss: 2.924086093902588


Processing epoch 01:  73%|███████▎  | 3530/4850 [16:55<06:34,  3.35it/s]

Epoch: 2, Loss: 2.540971279144287


Processing epoch 01:  73%|███████▎  | 3531/4850 [16:55<06:30,  3.38it/s]

Epoch: 2, Loss: 2.4837732315063477


Processing epoch 01:  73%|███████▎  | 3532/4850 [16:55<06:31,  3.37it/s]

Epoch: 2, Loss: 2.497283697128296


Processing epoch 01:  73%|███████▎  | 3533/4850 [16:55<06:24,  3.43it/s]

Epoch: 2, Loss: 2.118201732635498


Processing epoch 01:  73%|███████▎  | 3534/4850 [16:56<06:23,  3.43it/s]

Epoch: 2, Loss: 2.4339096546173096


Processing epoch 01:  73%|███████▎  | 3535/4850 [16:56<06:23,  3.43it/s]

Epoch: 2, Loss: 2.656524181365967


Processing epoch 01:  73%|███████▎  | 3536/4850 [16:56<06:27,  3.39it/s]

Epoch: 2, Loss: 2.4329304695129395


Processing epoch 01:  73%|███████▎  | 3537/4850 [16:57<06:23,  3.43it/s]

Epoch: 2, Loss: 2.198517322540283


Processing epoch 01:  73%|███████▎  | 3538/4850 [16:57<06:18,  3.47it/s]

Epoch: 2, Loss: 3.453761100769043


Processing epoch 01:  73%|███████▎  | 3539/4850 [16:57<06:17,  3.48it/s]

Epoch: 2, Loss: 2.805248498916626


Processing epoch 01:  73%|███████▎  | 3540/4850 [16:57<06:13,  3.50it/s]

Epoch: 2, Loss: 2.8590879440307617


Processing epoch 01:  73%|███████▎  | 3541/4850 [16:58<06:12,  3.51it/s]

Epoch: 2, Loss: 2.6583192348480225


Processing epoch 01:  73%|███████▎  | 3542/4850 [16:58<06:10,  3.53it/s]

Epoch: 2, Loss: 2.300527572631836


Processing epoch 01:  73%|███████▎  | 3543/4850 [16:58<06:13,  3.50it/s]

Epoch: 2, Loss: 2.775916576385498


Processing epoch 01:  73%|███████▎  | 3544/4850 [16:59<06:10,  3.52it/s]

Epoch: 2, Loss: 2.6345906257629395


Processing epoch 01:  73%|███████▎  | 3545/4850 [16:59<06:10,  3.52it/s]

Epoch: 2, Loss: 2.5882086753845215


Processing epoch 01:  73%|███████▎  | 3546/4850 [16:59<06:15,  3.48it/s]

Epoch: 2, Loss: 2.252331256866455


Processing epoch 01:  73%|███████▎  | 3547/4850 [16:59<06:14,  3.48it/s]

Epoch: 2, Loss: 2.0884275436401367


Processing epoch 01:  73%|███████▎  | 3548/4850 [17:00<06:16,  3.45it/s]

Epoch: 2, Loss: 2.398435115814209


Processing epoch 01:  73%|███████▎  | 3549/4850 [17:00<06:15,  3.47it/s]

Epoch: 2, Loss: 2.4281463623046875


Processing epoch 01:  73%|███████▎  | 3550/4850 [17:00<06:13,  3.48it/s]

Epoch: 2, Loss: 2.4807674884796143


Processing epoch 01:  73%|███████▎  | 3551/4850 [17:01<06:12,  3.49it/s]

Epoch: 2, Loss: 2.1718907356262207


Processing epoch 01:  73%|███████▎  | 3552/4850 [17:01<06:12,  3.48it/s]

Epoch: 2, Loss: 2.3276214599609375


Processing epoch 01:  73%|███████▎  | 3553/4850 [17:01<06:12,  3.48it/s]

Epoch: 2, Loss: 2.9189136028289795


Processing epoch 01:  73%|███████▎  | 3554/4850 [17:02<06:19,  3.42it/s]

Epoch: 2, Loss: 2.67604398727417


Processing epoch 01:  73%|███████▎  | 3555/4850 [17:02<06:15,  3.45it/s]

Epoch: 2, Loss: 2.2583534717559814


Processing epoch 01:  73%|███████▎  | 3556/4850 [17:02<06:13,  3.46it/s]

Epoch: 2, Loss: 2.4289798736572266


Processing epoch 01:  73%|███████▎  | 3557/4850 [17:02<06:13,  3.46it/s]

Epoch: 2, Loss: 2.575129985809326


Processing epoch 01:  73%|███████▎  | 3558/4850 [17:03<06:12,  3.47it/s]

Epoch: 2, Loss: 2.476134777069092


Processing epoch 01:  73%|███████▎  | 3559/4850 [17:03<06:11,  3.48it/s]

Epoch: 2, Loss: 2.1583220958709717


Processing epoch 01:  73%|███████▎  | 3560/4850 [17:03<06:09,  3.49it/s]

Epoch: 2, Loss: 2.5419673919677734


Processing epoch 01:  73%|███████▎  | 3561/4850 [17:04<06:08,  3.50it/s]

Epoch: 2, Loss: 3.365145206451416


Processing epoch 01:  73%|███████▎  | 3562/4850 [17:04<06:06,  3.51it/s]

Epoch: 2, Loss: 2.137563943862915


Processing epoch 01:  73%|███████▎  | 3563/4850 [17:04<06:05,  3.52it/s]

Epoch: 2, Loss: 2.519207239151001


Processing epoch 01:  73%|███████▎  | 3564/4850 [17:04<06:04,  3.52it/s]

Epoch: 2, Loss: 2.5521957874298096


Processing epoch 01:  74%|███████▎  | 3565/4850 [17:05<06:11,  3.46it/s]

Epoch: 2, Loss: 2.074845314025879


Processing epoch 01:  74%|███████▎  | 3566/4850 [17:05<06:10,  3.47it/s]

Epoch: 2, Loss: 2.6071062088012695


Processing epoch 01:  74%|███████▎  | 3567/4850 [17:05<06:09,  3.48it/s]

Epoch: 2, Loss: 2.7023138999938965


Processing epoch 01:  74%|███████▎  | 3568/4850 [17:06<06:08,  3.48it/s]

Epoch: 2, Loss: 2.24595308303833


Processing epoch 01:  74%|███████▎  | 3569/4850 [17:06<06:05,  3.51it/s]

Epoch: 2, Loss: 3.1403541564941406


Processing epoch 01:  74%|███████▎  | 3570/4850 [17:06<06:05,  3.50it/s]

Epoch: 2, Loss: 2.571507453918457


Processing epoch 01:  74%|███████▎  | 3571/4850 [17:06<06:03,  3.52it/s]

Epoch: 2, Loss: 2.755279302597046


Processing epoch 01:  74%|███████▎  | 3572/4850 [17:07<06:05,  3.50it/s]

Epoch: 2, Loss: 2.285926580429077


Processing epoch 01:  74%|███████▎  | 3573/4850 [17:07<06:09,  3.46it/s]

Epoch: 2, Loss: 3.0492804050445557


Processing epoch 01:  74%|███████▎  | 3574/4850 [17:07<06:11,  3.44it/s]

Epoch: 2, Loss: 2.253666639328003


Processing epoch 01:  74%|███████▎  | 3575/4850 [17:08<06:13,  3.41it/s]

Epoch: 2, Loss: 2.7911128997802734


Processing epoch 01:  74%|███████▎  | 3576/4850 [17:08<06:18,  3.37it/s]

Epoch: 2, Loss: 2.688277244567871


Processing epoch 01:  74%|███████▍  | 3577/4850 [17:08<06:16,  3.38it/s]

Epoch: 2, Loss: 1.7292063236236572


Processing epoch 01:  74%|███████▍  | 3578/4850 [17:08<06:13,  3.41it/s]

Epoch: 2, Loss: 2.355160713195801


Processing epoch 01:  74%|███████▍  | 3579/4850 [17:09<06:17,  3.37it/s]

Epoch: 2, Loss: 2.271413803100586


Processing epoch 01:  74%|███████▍  | 3580/4850 [17:09<06:16,  3.37it/s]

Epoch: 2, Loss: 2.6172404289245605


Processing epoch 01:  74%|███████▍  | 3581/4850 [17:09<06:15,  3.38it/s]

Epoch: 2, Loss: 2.6526267528533936


Processing epoch 01:  74%|███████▍  | 3582/4850 [17:10<06:13,  3.40it/s]

Epoch: 2, Loss: 2.0378201007843018


Processing epoch 01:  74%|███████▍  | 3583/4850 [17:10<06:18,  3.35it/s]

Epoch: 2, Loss: 2.6147732734680176


Processing epoch 01:  74%|███████▍  | 3584/4850 [17:10<06:22,  3.31it/s]

Epoch: 2, Loss: 2.350064516067505


Processing epoch 01:  74%|███████▍  | 3585/4850 [17:11<06:22,  3.31it/s]

Epoch: 2, Loss: 2.205595016479492


Processing epoch 01:  74%|███████▍  | 3586/4850 [17:11<06:13,  3.38it/s]

Epoch: 2, Loss: 2.849628448486328


Processing epoch 01:  74%|███████▍  | 3587/4850 [17:11<06:16,  3.36it/s]

Epoch: 2, Loss: 2.5889172554016113


Processing epoch 01:  74%|███████▍  | 3588/4850 [17:11<06:17,  3.34it/s]

Epoch: 2, Loss: 2.440260410308838


Processing epoch 01:  74%|███████▍  | 3589/4850 [17:12<06:11,  3.39it/s]

Epoch: 2, Loss: 2.435643434524536


Processing epoch 01:  74%|███████▍  | 3590/4850 [17:12<06:09,  3.41it/s]

Epoch: 2, Loss: 2.339547634124756


Processing epoch 01:  74%|███████▍  | 3591/4850 [17:12<06:12,  3.38it/s]

Epoch: 2, Loss: 2.9613196849823


Processing epoch 01:  74%|███████▍  | 3592/4850 [17:13<06:09,  3.41it/s]

Epoch: 2, Loss: 2.4404096603393555


Processing epoch 01:  74%|███████▍  | 3593/4850 [17:13<06:04,  3.45it/s]

Epoch: 2, Loss: 2.3050060272216797


Processing epoch 01:  74%|███████▍  | 3594/4850 [17:13<06:08,  3.41it/s]

Epoch: 2, Loss: 2.8997673988342285


Processing epoch 01:  74%|███████▍  | 3595/4850 [17:13<06:03,  3.45it/s]

Epoch: 2, Loss: 3.1661252975463867


Processing epoch 01:  74%|███████▍  | 3596/4850 [17:14<06:01,  3.47it/s]

Epoch: 2, Loss: 3.5689823627471924


Processing epoch 01:  74%|███████▍  | 3597/4850 [17:14<06:00,  3.47it/s]

Epoch: 2, Loss: 2.3319904804229736


Processing epoch 01:  74%|███████▍  | 3598/4850 [17:14<05:59,  3.48it/s]

Epoch: 2, Loss: 2.518904685974121


Processing epoch 01:  74%|███████▍  | 3599/4850 [17:15<05:57,  3.50it/s]

Epoch: 2, Loss: 2.2801923751831055


Processing epoch 01:  74%|███████▍  | 3600/4850 [17:15<05:56,  3.51it/s]

Epoch: 2, Loss: 1.7369954586029053


Processing epoch 01:  74%|███████▍  | 3601/4850 [17:15<05:57,  3.50it/s]

Epoch: 2, Loss: 2.6277332305908203


Processing epoch 01:  74%|███████▍  | 3602/4850 [17:15<05:55,  3.51it/s]

Epoch: 2, Loss: 2.387094736099243


Processing epoch 01:  74%|███████▍  | 3603/4850 [17:16<05:54,  3.52it/s]

Epoch: 2, Loss: 2.457127094268799


Processing epoch 01:  74%|███████▍  | 3604/4850 [17:16<05:53,  3.52it/s]

Epoch: 2, Loss: 2.3141636848449707


Processing epoch 01:  74%|███████▍  | 3605/4850 [17:16<05:55,  3.51it/s]

Epoch: 2, Loss: 1.9585649967193604


Processing epoch 01:  74%|███████▍  | 3606/4850 [17:17<05:52,  3.53it/s]

Epoch: 2, Loss: 2.4713683128356934


Processing epoch 01:  74%|███████▍  | 3607/4850 [17:17<05:52,  3.53it/s]

Epoch: 2, Loss: 2.490115165710449


Processing epoch 01:  74%|███████▍  | 3608/4850 [17:17<05:53,  3.52it/s]

Epoch: 2, Loss: 2.2135305404663086


Processing epoch 01:  74%|███████▍  | 3609/4850 [17:17<05:52,  3.52it/s]

Epoch: 2, Loss: 3.051633358001709


Processing epoch 01:  74%|███████▍  | 3610/4850 [17:18<05:52,  3.52it/s]

Epoch: 2, Loss: 2.6815543174743652


Processing epoch 01:  74%|███████▍  | 3611/4850 [17:18<05:52,  3.51it/s]

Epoch: 2, Loss: 2.7614541053771973


Processing epoch 01:  74%|███████▍  | 3612/4850 [17:18<05:53,  3.50it/s]

Epoch: 2, Loss: 2.5527124404907227


Processing epoch 01:  74%|███████▍  | 3613/4850 [17:19<05:53,  3.50it/s]

Epoch: 2, Loss: 2.2062277793884277


Processing epoch 01:  75%|███████▍  | 3614/4850 [17:19<05:53,  3.50it/s]

Epoch: 2, Loss: 2.4370009899139404


Processing epoch 01:  75%|███████▍  | 3615/4850 [17:19<05:52,  3.50it/s]

Epoch: 2, Loss: 2.7741293907165527


Processing epoch 01:  75%|███████▍  | 3616/4850 [17:19<05:53,  3.49it/s]

Epoch: 2, Loss: 2.1316416263580322


Processing epoch 01:  75%|███████▍  | 3617/4850 [17:20<05:53,  3.49it/s]

Epoch: 2, Loss: 2.364612579345703


Processing epoch 01:  75%|███████▍  | 3618/4850 [17:20<05:54,  3.48it/s]

Epoch: 2, Loss: 2.473463535308838


Processing epoch 01:  75%|███████▍  | 3619/4850 [17:20<05:53,  3.48it/s]

Epoch: 2, Loss: 1.9404454231262207


Processing epoch 01:  75%|███████▍  | 3620/4850 [17:21<05:51,  3.50it/s]

Epoch: 2, Loss: 2.555914878845215


Processing epoch 01:  75%|███████▍  | 3621/4850 [17:21<05:51,  3.49it/s]

Epoch: 2, Loss: 3.00146484375


Processing epoch 01:  75%|███████▍  | 3622/4850 [17:21<05:49,  3.52it/s]

Epoch: 2, Loss: 2.4626550674438477


Processing epoch 01:  75%|███████▍  | 3623/4850 [17:21<05:54,  3.46it/s]

Epoch: 2, Loss: 2.4488635063171387


Processing epoch 01:  75%|███████▍  | 3624/4850 [17:22<05:57,  3.43it/s]

Epoch: 2, Loss: 2.63338565826416


Processing epoch 01:  75%|███████▍  | 3625/4850 [17:22<05:55,  3.45it/s]

Epoch: 2, Loss: 2.244966506958008


Processing epoch 01:  75%|███████▍  | 3626/4850 [17:22<05:52,  3.48it/s]

Epoch: 2, Loss: 2.0043392181396484


Processing epoch 01:  75%|███████▍  | 3627/4850 [17:23<05:49,  3.50it/s]

Epoch: 2, Loss: 2.8663220405578613


Processing epoch 01:  75%|███████▍  | 3628/4850 [17:23<05:52,  3.47it/s]

Epoch: 2, Loss: 2.2878425121307373


Processing epoch 01:  75%|███████▍  | 3629/4850 [17:23<05:49,  3.50it/s]

Epoch: 2, Loss: 2.6570987701416016


Processing epoch 01:  75%|███████▍  | 3630/4850 [17:23<05:47,  3.51it/s]

Epoch: 2, Loss: 2.190253734588623


Processing epoch 01:  75%|███████▍  | 3631/4850 [17:24<05:46,  3.52it/s]

Epoch: 2, Loss: 2.110788583755493


Processing epoch 01:  75%|███████▍  | 3632/4850 [17:24<05:51,  3.47it/s]

Epoch: 2, Loss: 2.941565990447998


Processing epoch 01:  75%|███████▍  | 3633/4850 [17:24<05:53,  3.44it/s]

Epoch: 2, Loss: 2.8487682342529297


Processing epoch 01:  75%|███████▍  | 3634/4850 [17:25<05:54,  3.43it/s]

Epoch: 2, Loss: 2.542288303375244


Processing epoch 01:  75%|███████▍  | 3635/4850 [17:25<05:59,  3.38it/s]

Epoch: 2, Loss: 2.6364736557006836


Processing epoch 01:  75%|███████▍  | 3636/4850 [17:25<06:00,  3.37it/s]

Epoch: 2, Loss: 3.0376439094543457


Processing epoch 01:  75%|███████▍  | 3637/4850 [17:26<05:55,  3.42it/s]

Epoch: 2, Loss: 2.326510429382324


Processing epoch 01:  75%|███████▌  | 3638/4850 [17:26<05:53,  3.43it/s]

Epoch: 2, Loss: 2.364321708679199


Processing epoch 01:  75%|███████▌  | 3639/4850 [17:26<05:57,  3.39it/s]

Epoch: 2, Loss: 2.300961971282959


Processing epoch 01:  75%|███████▌  | 3640/4850 [17:26<06:01,  3.35it/s]

Epoch: 2, Loss: 2.5689330101013184


Processing epoch 01:  75%|███████▌  | 3641/4850 [17:27<05:57,  3.38it/s]

Epoch: 2, Loss: 2.2391815185546875


Processing epoch 01:  75%|███████▌  | 3642/4850 [17:27<05:53,  3.42it/s]

Epoch: 2, Loss: 2.367328643798828


Processing epoch 01:  75%|███████▌  | 3643/4850 [17:27<05:52,  3.43it/s]

Epoch: 2, Loss: 2.100141763687134


Processing epoch 01:  75%|███████▌  | 3644/4850 [17:28<05:48,  3.46it/s]

Epoch: 2, Loss: 2.5203657150268555


Processing epoch 01:  75%|███████▌  | 3645/4850 [17:28<05:49,  3.45it/s]

Epoch: 2, Loss: 2.646594524383545


Processing epoch 01:  75%|███████▌  | 3646/4850 [17:28<05:46,  3.48it/s]

Epoch: 2, Loss: 3.103278160095215


Processing epoch 01:  75%|███████▌  | 3647/4850 [17:28<05:45,  3.48it/s]

Epoch: 2, Loss: 2.4588022232055664


Processing epoch 01:  75%|███████▌  | 3648/4850 [17:29<05:43,  3.50it/s]

Epoch: 2, Loss: 2.045684576034546


Processing epoch 01:  75%|███████▌  | 3649/4850 [17:29<05:44,  3.49it/s]

Epoch: 2, Loss: 2.589552879333496


Processing epoch 01:  75%|███████▌  | 3650/4850 [17:29<05:44,  3.49it/s]

Epoch: 2, Loss: 2.1263725757598877


Processing epoch 01:  75%|███████▌  | 3651/4850 [17:30<05:43,  3.49it/s]

Epoch: 2, Loss: 2.8754007816314697


Processing epoch 01:  75%|███████▌  | 3652/4850 [17:30<05:41,  3.51it/s]

Epoch: 2, Loss: 2.966769218444824


Processing epoch 01:  75%|███████▌  | 3653/4850 [17:30<05:39,  3.53it/s]

Epoch: 2, Loss: 2.8995771408081055


Processing epoch 01:  75%|███████▌  | 3654/4850 [17:30<05:40,  3.51it/s]

Epoch: 2, Loss: 2.2238144874572754


Processing epoch 01:  75%|███████▌  | 3655/4850 [17:31<05:39,  3.52it/s]

Epoch: 2, Loss: 2.818769931793213


Processing epoch 01:  75%|███████▌  | 3656/4850 [17:31<05:42,  3.49it/s]

Epoch: 2, Loss: 3.4110817909240723


Processing epoch 01:  75%|███████▌  | 3657/4850 [17:31<05:39,  3.51it/s]

Epoch: 2, Loss: 2.140535831451416


Processing epoch 01:  75%|███████▌  | 3658/4850 [17:32<05:39,  3.52it/s]

Epoch: 2, Loss: 2.835742473602295


Processing epoch 01:  75%|███████▌  | 3659/4850 [17:32<05:42,  3.47it/s]

Epoch: 2, Loss: 2.360647678375244


Processing epoch 01:  75%|███████▌  | 3660/4850 [17:32<05:41,  3.49it/s]

Epoch: 2, Loss: 2.4222121238708496


Processing epoch 01:  75%|███████▌  | 3661/4850 [17:32<05:40,  3.49it/s]

Epoch: 2, Loss: 2.669358253479004


Processing epoch 01:  76%|███████▌  | 3662/4850 [17:33<05:40,  3.49it/s]

Epoch: 2, Loss: 2.6721351146698


Processing epoch 01:  76%|███████▌  | 3663/4850 [17:33<05:40,  3.48it/s]

Epoch: 2, Loss: 2.6633262634277344


Processing epoch 01:  76%|███████▌  | 3664/4850 [17:33<05:38,  3.51it/s]

Epoch: 2, Loss: 2.634625196456909


Processing epoch 01:  76%|███████▌  | 3665/4850 [17:34<05:38,  3.50it/s]

Epoch: 2, Loss: 2.1325435638427734


Processing epoch 01:  76%|███████▌  | 3666/4850 [17:34<05:39,  3.48it/s]

Epoch: 2, Loss: 2.198657274246216


Processing epoch 01:  76%|███████▌  | 3667/4850 [17:34<05:42,  3.46it/s]

Epoch: 2, Loss: 2.6559622287750244


Processing epoch 01:  76%|███████▌  | 3668/4850 [17:34<05:38,  3.50it/s]

Epoch: 2, Loss: 3.559354305267334


Processing epoch 01:  76%|███████▌  | 3669/4850 [17:35<05:36,  3.51it/s]

Epoch: 2, Loss: 2.8258323669433594


Processing epoch 01:  76%|███████▌  | 3670/4850 [17:35<05:34,  3.52it/s]

Epoch: 2, Loss: 2.819802761077881


Processing epoch 01:  76%|███████▌  | 3671/4850 [17:35<05:32,  3.54it/s]

Epoch: 2, Loss: 2.747619867324829


Processing epoch 01:  76%|███████▌  | 3672/4850 [17:36<05:30,  3.56it/s]

Epoch: 2, Loss: 3.3983216285705566


Processing epoch 01:  76%|███████▌  | 3673/4850 [17:36<05:32,  3.54it/s]

Epoch: 2, Loss: 2.5508787631988525


Processing epoch 01:  76%|███████▌  | 3674/4850 [17:36<05:32,  3.54it/s]

Epoch: 2, Loss: 2.581538438796997


Processing epoch 01:  76%|███████▌  | 3675/4850 [17:36<05:32,  3.54it/s]

Epoch: 2, Loss: 2.707653045654297


Processing epoch 01:  76%|███████▌  | 3676/4850 [17:37<05:36,  3.49it/s]

Epoch: 2, Loss: 3.1214599609375


Processing epoch 01:  76%|███████▌  | 3677/4850 [17:37<05:38,  3.47it/s]

Epoch: 2, Loss: 2.294205665588379


Processing epoch 01:  76%|███████▌  | 3678/4850 [17:37<05:39,  3.45it/s]

Epoch: 2, Loss: 2.864813804626465


Processing epoch 01:  76%|███████▌  | 3679/4850 [17:38<05:36,  3.47it/s]

Epoch: 2, Loss: 2.41782283782959


Processing epoch 01:  76%|███████▌  | 3680/4850 [17:38<05:35,  3.49it/s]

Epoch: 2, Loss: 2.359189510345459


Processing epoch 01:  76%|███████▌  | 3681/4850 [17:38<05:34,  3.50it/s]

Epoch: 2, Loss: 2.384618043899536


Processing epoch 01:  76%|███████▌  | 3682/4850 [17:38<05:32,  3.51it/s]

Epoch: 2, Loss: 2.7367899417877197


Processing epoch 01:  76%|███████▌  | 3683/4850 [17:39<05:31,  3.52it/s]

Epoch: 2, Loss: 2.682976245880127


Processing epoch 01:  76%|███████▌  | 3684/4850 [17:39<05:32,  3.50it/s]

Epoch: 2, Loss: 2.4276459217071533


Processing epoch 01:  76%|███████▌  | 3685/4850 [17:39<05:36,  3.46it/s]

Epoch: 2, Loss: 2.943699359893799


Processing epoch 01:  76%|███████▌  | 3686/4850 [17:40<05:41,  3.41it/s]

Epoch: 2, Loss: 2.4245498180389404


Processing epoch 01:  76%|███████▌  | 3687/4850 [17:40<05:42,  3.40it/s]

Epoch: 2, Loss: 2.478348970413208


Processing epoch 01:  76%|███████▌  | 3688/4850 [17:40<05:44,  3.38it/s]

Epoch: 2, Loss: 2.6282200813293457


Processing epoch 01:  76%|███████▌  | 3689/4850 [17:40<05:44,  3.37it/s]

Epoch: 2, Loss: 3.0612435340881348


Processing epoch 01:  76%|███████▌  | 3690/4850 [17:41<05:39,  3.42it/s]

Epoch: 2, Loss: 2.3295645713806152


Processing epoch 01:  76%|███████▌  | 3691/4850 [17:41<05:34,  3.46it/s]

Epoch: 2, Loss: 3.839658260345459


Processing epoch 01:  76%|███████▌  | 3692/4850 [17:41<05:34,  3.46it/s]

Epoch: 2, Loss: 2.83640456199646


Processing epoch 01:  76%|███████▌  | 3693/4850 [17:42<05:44,  3.35it/s]

Epoch: 2, Loss: 2.3320069313049316


Processing epoch 01:  76%|███████▌  | 3694/4850 [17:42<05:40,  3.40it/s]

Epoch: 2, Loss: 2.87858247756958


Processing epoch 01:  76%|███████▌  | 3695/4850 [17:42<05:36,  3.43it/s]

Epoch: 2, Loss: 2.6673715114593506


Processing epoch 01:  76%|███████▌  | 3696/4850 [17:42<05:34,  3.45it/s]

Epoch: 2, Loss: 2.156905174255371


Processing epoch 01:  76%|███████▌  | 3697/4850 [17:43<05:33,  3.45it/s]

Epoch: 2, Loss: 2.425279378890991


Processing epoch 01:  76%|███████▌  | 3698/4850 [17:43<05:30,  3.48it/s]

Epoch: 2, Loss: 2.6684927940368652


Processing epoch 01:  76%|███████▋  | 3699/4850 [17:43<05:29,  3.49it/s]

Epoch: 2, Loss: 2.407839775085449


Processing epoch 01:  76%|███████▋  | 3700/4850 [17:44<05:31,  3.46it/s]

Epoch: 2, Loss: 2.327995538711548


Processing epoch 01:  76%|███████▋  | 3701/4850 [17:44<05:29,  3.48it/s]

Epoch: 2, Loss: 2.549459934234619


Processing epoch 01:  76%|███████▋  | 3702/4850 [17:44<05:28,  3.49it/s]

Epoch: 2, Loss: 2.384316921234131


Processing epoch 01:  76%|███████▋  | 3703/4850 [17:45<05:31,  3.46it/s]

Epoch: 2, Loss: 2.606125831604004


Processing epoch 01:  76%|███████▋  | 3704/4850 [17:45<05:30,  3.47it/s]

Epoch: 2, Loss: 2.592231273651123


Processing epoch 01:  76%|███████▋  | 3705/4850 [17:45<05:28,  3.48it/s]

Epoch: 2, Loss: 2.232140064239502


Processing epoch 01:  76%|███████▋  | 3706/4850 [17:45<05:29,  3.47it/s]

Epoch: 2, Loss: 2.2002077102661133


Processing epoch 01:  76%|███████▋  | 3707/4850 [17:46<05:31,  3.45it/s]

Epoch: 2, Loss: 2.8830153942108154


Processing epoch 01:  76%|███████▋  | 3708/4850 [17:46<05:30,  3.46it/s]

Epoch: 2, Loss: 1.8380944728851318


Processing epoch 01:  76%|███████▋  | 3709/4850 [17:46<05:28,  3.48it/s]

Epoch: 2, Loss: 3.0272488594055176


Processing epoch 01:  76%|███████▋  | 3710/4850 [17:47<05:27,  3.48it/s]

Epoch: 2, Loss: 2.8644261360168457


Processing epoch 01:  77%|███████▋  | 3711/4850 [17:47<05:26,  3.49it/s]

Epoch: 2, Loss: 2.528501033782959


Processing epoch 01:  77%|███████▋  | 3712/4850 [17:47<05:25,  3.50it/s]

Epoch: 2, Loss: 2.403233051300049


Processing epoch 01:  77%|███████▋  | 3713/4850 [17:47<05:24,  3.51it/s]

Epoch: 2, Loss: 2.894739866256714


Processing epoch 01:  77%|███████▋  | 3714/4850 [17:48<05:23,  3.52it/s]

Epoch: 2, Loss: 2.340658664703369


Processing epoch 01:  77%|███████▋  | 3715/4850 [17:48<05:22,  3.52it/s]

Epoch: 2, Loss: 2.395218849182129


Processing epoch 01:  77%|███████▋  | 3716/4850 [17:48<05:21,  3.53it/s]

Epoch: 2, Loss: 2.2343978881835938


Processing epoch 01:  77%|███████▋  | 3717/4850 [17:49<05:20,  3.54it/s]

Epoch: 2, Loss: 2.6647560596466064


Processing epoch 01:  77%|███████▋  | 3718/4850 [17:49<05:22,  3.51it/s]

Epoch: 2, Loss: 2.7228260040283203


Processing epoch 01:  77%|███████▋  | 3719/4850 [17:49<05:22,  3.51it/s]

Epoch: 2, Loss: 2.2884984016418457


Processing epoch 01:  77%|███████▋  | 3720/4850 [17:49<05:21,  3.52it/s]

Epoch: 2, Loss: 1.9405407905578613


Processing epoch 01:  77%|███████▋  | 3721/4850 [17:50<05:19,  3.54it/s]

Epoch: 2, Loss: 3.089810371398926


Processing epoch 01:  77%|███████▋  | 3722/4850 [17:50<05:19,  3.53it/s]

Epoch: 2, Loss: 2.094632148742676


Processing epoch 01:  77%|███████▋  | 3723/4850 [17:50<05:18,  3.54it/s]

Epoch: 2, Loss: 2.6878952980041504


Processing epoch 01:  77%|███████▋  | 3724/4850 [17:50<05:17,  3.54it/s]

Epoch: 2, Loss: 2.7171897888183594


Processing epoch 01:  77%|███████▋  | 3725/4850 [17:51<05:19,  3.52it/s]

Epoch: 2, Loss: 2.4858641624450684


Processing epoch 01:  77%|███████▋  | 3726/4850 [17:51<05:18,  3.53it/s]

Epoch: 2, Loss: 2.700847625732422


Processing epoch 01:  77%|███████▋  | 3727/4850 [17:51<05:17,  3.54it/s]

Epoch: 2, Loss: 2.7261481285095215


Processing epoch 01:  77%|███████▋  | 3728/4850 [17:52<05:18,  3.52it/s]

Epoch: 2, Loss: 2.222353219985962


Processing epoch 01:  77%|███████▋  | 3729/4850 [17:52<05:23,  3.47it/s]

Epoch: 2, Loss: 2.4707159996032715


Processing epoch 01:  77%|███████▋  | 3730/4850 [17:52<05:23,  3.46it/s]

Epoch: 2, Loss: 2.5420937538146973


Processing epoch 01:  77%|███████▋  | 3731/4850 [17:53<05:24,  3.44it/s]

Epoch: 2, Loss: 2.8090548515319824


Processing epoch 01:  77%|███████▋  | 3732/4850 [17:53<05:25,  3.43it/s]

Epoch: 2, Loss: 2.682495594024658


Processing epoch 01:  77%|███████▋  | 3733/4850 [17:53<05:27,  3.41it/s]

Epoch: 2, Loss: 2.389927387237549


Processing epoch 01:  77%|███████▋  | 3734/4850 [17:53<05:31,  3.36it/s]

Epoch: 2, Loss: 2.311314105987549


Processing epoch 01:  77%|███████▋  | 3735/4850 [17:54<05:31,  3.37it/s]

Epoch: 2, Loss: 2.6171140670776367


Processing epoch 01:  77%|███████▋  | 3736/4850 [17:54<05:35,  3.32it/s]

Epoch: 2, Loss: 2.54066801071167


Processing epoch 01:  77%|███████▋  | 3737/4850 [17:54<05:34,  3.32it/s]

Epoch: 2, Loss: 3.256743907928467


Processing epoch 01:  77%|███████▋  | 3738/4850 [17:55<05:28,  3.39it/s]

Epoch: 2, Loss: 2.40417742729187


Processing epoch 01:  77%|███████▋  | 3739/4850 [17:55<05:28,  3.38it/s]

Epoch: 2, Loss: 2.7255895137786865


Processing epoch 01:  77%|███████▋  | 3740/4850 [17:55<05:32,  3.34it/s]

Epoch: 2, Loss: 2.907989025115967


Processing epoch 01:  77%|███████▋  | 3741/4850 [17:56<05:33,  3.33it/s]

Epoch: 2, Loss: 2.439879894256592


Processing epoch 01:  77%|███████▋  | 3742/4850 [17:56<05:36,  3.29it/s]

Epoch: 2, Loss: 2.233036518096924


Processing epoch 01:  77%|███████▋  | 3743/4850 [17:56<05:34,  3.31it/s]

Epoch: 2, Loss: 3.0369057655334473


Processing epoch 01:  77%|███████▋  | 3744/4850 [17:56<05:36,  3.29it/s]

Epoch: 2, Loss: 2.4758994579315186


Processing epoch 01:  77%|███████▋  | 3745/4850 [17:57<05:30,  3.34it/s]

Epoch: 2, Loss: 2.409942626953125


Processing epoch 01:  77%|███████▋  | 3746/4850 [17:57<05:28,  3.36it/s]

Epoch: 2, Loss: 2.5880661010742188


Processing epoch 01:  77%|███████▋  | 3747/4850 [17:57<05:29,  3.34it/s]

Epoch: 2, Loss: 2.7903025150299072


Processing epoch 01:  77%|███████▋  | 3748/4850 [17:58<05:24,  3.39it/s]

Epoch: 2, Loss: 2.2733988761901855


Processing epoch 01:  77%|███████▋  | 3749/4850 [17:58<05:21,  3.42it/s]

Epoch: 2, Loss: 2.1865994930267334


Processing epoch 01:  77%|███████▋  | 3750/4850 [17:58<05:21,  3.42it/s]

Epoch: 2, Loss: 2.270024061203003


Processing epoch 01:  77%|███████▋  | 3751/4850 [17:58<05:18,  3.45it/s]

Epoch: 2, Loss: 2.0837037563323975


Processing epoch 01:  77%|███████▋  | 3752/4850 [17:59<05:15,  3.48it/s]

Epoch: 2, Loss: 2.5205979347229004


Processing epoch 01:  77%|███████▋  | 3753/4850 [17:59<05:14,  3.49it/s]

Epoch: 2, Loss: 2.3908438682556152


Processing epoch 01:  77%|███████▋  | 3754/4850 [17:59<05:13,  3.50it/s]

Epoch: 2, Loss: 2.153956174850464


Processing epoch 01:  77%|███████▋  | 3755/4850 [18:00<05:17,  3.45it/s]

Epoch: 2, Loss: 2.571826457977295


Processing epoch 01:  77%|███████▋  | 3756/4850 [18:00<05:21,  3.40it/s]

Epoch: 2, Loss: 2.9482860565185547


Processing epoch 01:  77%|███████▋  | 3757/4850 [18:00<05:25,  3.36it/s]

Epoch: 2, Loss: 2.5758185386657715


Processing epoch 01:  77%|███████▋  | 3758/4850 [18:01<05:26,  3.35it/s]

Epoch: 2, Loss: 2.7901129722595215


Processing epoch 01:  78%|███████▊  | 3759/4850 [18:01<05:27,  3.33it/s]

Epoch: 2, Loss: 2.264838218688965


Processing epoch 01:  78%|███████▊  | 3760/4850 [18:01<05:24,  3.36it/s]

Epoch: 2, Loss: 2.5509989261627197


Processing epoch 01:  78%|███████▊  | 3761/4850 [18:01<05:22,  3.38it/s]

Epoch: 2, Loss: 2.104743480682373


Processing epoch 01:  78%|███████▊  | 3762/4850 [18:02<05:20,  3.39it/s]

Epoch: 2, Loss: 3.004701614379883


Processing epoch 01:  78%|███████▊  | 3763/4850 [18:02<05:25,  3.34it/s]

Epoch: 2, Loss: 2.3217694759368896


Processing epoch 01:  78%|███████▊  | 3764/4850 [18:02<05:21,  3.38it/s]

Epoch: 2, Loss: 2.813502311706543


Processing epoch 01:  78%|███████▊  | 3765/4850 [18:03<05:20,  3.39it/s]

Epoch: 2, Loss: 2.2798547744750977


Processing epoch 01:  78%|███████▊  | 3766/4850 [18:03<05:18,  3.41it/s]

Epoch: 2, Loss: 2.427767515182495


Processing epoch 01:  78%|███████▊  | 3767/4850 [18:03<05:14,  3.45it/s]

Epoch: 2, Loss: 2.5278778076171875


Processing epoch 01:  78%|███████▊  | 3768/4850 [18:03<05:12,  3.47it/s]

Epoch: 2, Loss: 2.5172455310821533


Processing epoch 01:  78%|███████▊  | 3769/4850 [18:04<05:11,  3.47it/s]

Epoch: 2, Loss: 2.119368076324463


Processing epoch 01:  78%|███████▊  | 3770/4850 [18:04<05:09,  3.49it/s]

Epoch: 2, Loss: 2.3627610206604004


Processing epoch 01:  78%|███████▊  | 3771/4850 [18:04<05:07,  3.50it/s]

Epoch: 2, Loss: 2.6963157653808594


Processing epoch 01:  78%|███████▊  | 3772/4850 [18:05<05:06,  3.52it/s]

Epoch: 2, Loss: 2.847323179244995


Processing epoch 01:  78%|███████▊  | 3773/4850 [18:05<05:04,  3.53it/s]

Epoch: 2, Loss: 3.3560285568237305


Processing epoch 01:  78%|███████▊  | 3774/4850 [18:05<05:04,  3.53it/s]

Epoch: 2, Loss: 2.332260847091675


Processing epoch 01:  78%|███████▊  | 3775/4850 [18:05<05:07,  3.50it/s]

Epoch: 2, Loss: 2.330840826034546


Processing epoch 01:  78%|███████▊  | 3776/4850 [18:06<05:09,  3.47it/s]

Epoch: 2, Loss: 2.6930251121520996


Processing epoch 01:  78%|███████▊  | 3777/4850 [18:06<05:07,  3.49it/s]

Epoch: 2, Loss: 2.0919198989868164


Processing epoch 01:  78%|███████▊  | 3778/4850 [18:06<05:05,  3.50it/s]

Epoch: 2, Loss: 3.026810884475708


Processing epoch 01:  78%|███████▊  | 3779/4850 [18:07<05:06,  3.49it/s]

Epoch: 2, Loss: 3.0237984657287598


Processing epoch 01:  78%|███████▊  | 3780/4850 [18:07<05:09,  3.45it/s]

Epoch: 2, Loss: 2.4144129753112793


Processing epoch 01:  78%|███████▊  | 3781/4850 [18:07<05:13,  3.41it/s]

Epoch: 2, Loss: 2.6155734062194824


Processing epoch 01:  78%|███████▊  | 3782/4850 [18:07<05:09,  3.45it/s]

Epoch: 2, Loss: 2.1411070823669434


Processing epoch 01:  78%|███████▊  | 3783/4850 [18:08<05:12,  3.42it/s]

Epoch: 2, Loss: 3.0961575508117676


Processing epoch 01:  78%|███████▊  | 3784/4850 [18:08<05:11,  3.43it/s]

Epoch: 2, Loss: 2.1679317951202393


Processing epoch 01:  78%|███████▊  | 3785/4850 [18:08<05:06,  3.47it/s]

Epoch: 2, Loss: 2.5145277976989746


Processing epoch 01:  78%|███████▊  | 3786/4850 [18:09<05:10,  3.43it/s]

Epoch: 2, Loss: 2.331735134124756


Processing epoch 01:  78%|███████▊  | 3787/4850 [18:09<05:15,  3.37it/s]

Epoch: 2, Loss: 2.2346529960632324


Processing epoch 01:  78%|███████▊  | 3788/4850 [18:09<05:14,  3.38it/s]

Epoch: 2, Loss: 2.5566699504852295


Processing epoch 01:  78%|███████▊  | 3789/4850 [18:10<05:14,  3.37it/s]

Epoch: 2, Loss: 3.000168800354004


Processing epoch 01:  78%|███████▊  | 3790/4850 [18:10<05:20,  3.30it/s]

Epoch: 2, Loss: 2.3020098209381104


Processing epoch 01:  78%|███████▊  | 3791/4850 [18:10<05:17,  3.33it/s]

Epoch: 2, Loss: 2.417048454284668


Processing epoch 01:  78%|███████▊  | 3792/4850 [18:10<05:14,  3.36it/s]

Epoch: 2, Loss: 1.8466334342956543


Processing epoch 01:  78%|███████▊  | 3793/4850 [18:11<05:12,  3.38it/s]

Epoch: 2, Loss: 2.4933218955993652


Processing epoch 01:  78%|███████▊  | 3794/4850 [18:11<05:14,  3.35it/s]

Epoch: 2, Loss: 2.261385440826416


Processing epoch 01:  78%|███████▊  | 3795/4850 [18:11<05:11,  3.39it/s]

Epoch: 2, Loss: 2.4106147289276123


Processing epoch 01:  78%|███████▊  | 3796/4850 [18:12<05:07,  3.43it/s]

Epoch: 2, Loss: 2.4558117389678955


Processing epoch 01:  78%|███████▊  | 3797/4850 [18:12<05:05,  3.44it/s]

Epoch: 2, Loss: 1.9307177066802979


Processing epoch 01:  78%|███████▊  | 3798/4850 [18:12<05:04,  3.46it/s]

Epoch: 2, Loss: 2.4992098808288574


Processing epoch 01:  78%|███████▊  | 3799/4850 [18:12<05:04,  3.45it/s]

Epoch: 2, Loss: 2.2918288707733154


Processing epoch 01:  78%|███████▊  | 3800/4850 [18:13<05:03,  3.46it/s]

Epoch: 2, Loss: 2.7409310340881348


Processing epoch 01:  78%|███████▊  | 3801/4850 [18:13<05:03,  3.46it/s]

Epoch: 2, Loss: 2.0495738983154297


Processing epoch 01:  78%|███████▊  | 3802/4850 [18:13<05:01,  3.48it/s]

Epoch: 2, Loss: 2.301581382751465


Processing epoch 01:  78%|███████▊  | 3803/4850 [18:14<05:00,  3.49it/s]

Epoch: 2, Loss: 2.4722328186035156


Processing epoch 01:  78%|███████▊  | 3804/4850 [18:14<05:00,  3.48it/s]

Epoch: 2, Loss: 2.5067999362945557


Processing epoch 01:  78%|███████▊  | 3805/4850 [18:14<05:01,  3.46it/s]

Epoch: 2, Loss: 1.9720035791397095


Processing epoch 01:  78%|███████▊  | 3806/4850 [18:14<04:59,  3.48it/s]

Epoch: 2, Loss: 2.813951015472412


Processing epoch 01:  78%|███████▊  | 3807/4850 [18:15<04:56,  3.51it/s]

Epoch: 2, Loss: 3.026423215866089


Processing epoch 01:  79%|███████▊  | 3808/4850 [18:15<04:57,  3.50it/s]

Epoch: 2, Loss: 2.1528468132019043


Processing epoch 01:  79%|███████▊  | 3809/4850 [18:15<04:55,  3.52it/s]

Epoch: 2, Loss: 2.805802822113037


Processing epoch 01:  79%|███████▊  | 3810/4850 [18:16<04:56,  3.51it/s]

Epoch: 2, Loss: 2.087052345275879


Processing epoch 01:  79%|███████▊  | 3811/4850 [18:16<04:57,  3.49it/s]

Epoch: 2, Loss: 2.728914499282837


Processing epoch 01:  79%|███████▊  | 3812/4850 [18:16<04:59,  3.46it/s]

Epoch: 2, Loss: 2.632143259048462


Processing epoch 01:  79%|███████▊  | 3813/4850 [18:16<04:58,  3.47it/s]

Epoch: 2, Loss: 2.1966214179992676


Processing epoch 01:  79%|███████▊  | 3814/4850 [18:17<04:57,  3.48it/s]

Epoch: 2, Loss: 2.275820732116699


Processing epoch 01:  79%|███████▊  | 3815/4850 [18:17<04:55,  3.50it/s]

Epoch: 2, Loss: 2.9781975746154785


Processing epoch 01:  79%|███████▊  | 3816/4850 [18:17<04:55,  3.50it/s]

Epoch: 2, Loss: 3.1734375953674316


Processing epoch 01:  79%|███████▊  | 3817/4850 [18:18<04:53,  3.52it/s]

Epoch: 2, Loss: 2.8509507179260254


Processing epoch 01:  79%|███████▊  | 3818/4850 [18:18<04:54,  3.50it/s]

Epoch: 2, Loss: 2.521912097930908


Processing epoch 01:  79%|███████▊  | 3819/4850 [18:18<04:54,  3.51it/s]

Epoch: 2, Loss: 2.4939775466918945


Processing epoch 01:  79%|███████▉  | 3820/4850 [18:18<04:53,  3.51it/s]

Epoch: 2, Loss: 2.474720001220703


Processing epoch 01:  79%|███████▉  | 3821/4850 [18:19<04:53,  3.51it/s]

Epoch: 2, Loss: 2.2791543006896973


Processing epoch 01:  79%|███████▉  | 3822/4850 [18:19<04:53,  3.50it/s]

Epoch: 2, Loss: 2.6745171546936035


Processing epoch 01:  79%|███████▉  | 3823/4850 [18:19<04:54,  3.49it/s]

Epoch: 2, Loss: 2.4917330741882324


Processing epoch 01:  79%|███████▉  | 3824/4850 [18:20<04:54,  3.48it/s]

Epoch: 2, Loss: 2.173041343688965


Processing epoch 01:  79%|███████▉  | 3825/4850 [18:20<04:54,  3.48it/s]

Epoch: 2, Loss: 2.5405683517456055


Processing epoch 01:  79%|███████▉  | 3826/4850 [18:20<04:51,  3.51it/s]

Epoch: 2, Loss: 2.268012523651123


Processing epoch 01:  79%|███████▉  | 3827/4850 [18:20<04:51,  3.51it/s]

Epoch: 2, Loss: 2.529546022415161


Processing epoch 01:  79%|███████▉  | 3828/4850 [18:21<04:51,  3.51it/s]

Epoch: 2, Loss: 2.390613555908203


Processing epoch 01:  79%|███████▉  | 3829/4850 [18:21<04:50,  3.51it/s]

Epoch: 2, Loss: 2.4037516117095947


Processing epoch 01:  79%|███████▉  | 3830/4850 [18:21<04:54,  3.46it/s]

Epoch: 2, Loss: 2.7221193313598633


Processing epoch 01:  79%|███████▉  | 3831/4850 [18:22<04:57,  3.43it/s]

Epoch: 2, Loss: 2.6619865894317627


Processing epoch 01:  79%|███████▉  | 3832/4850 [18:22<05:00,  3.38it/s]

Epoch: 2, Loss: 2.2574729919433594


Processing epoch 01:  79%|███████▉  | 3833/4850 [18:22<05:05,  3.33it/s]

Epoch: 2, Loss: 2.4447314739227295


Processing epoch 01:  79%|███████▉  | 3834/4850 [18:23<04:59,  3.39it/s]

Epoch: 2, Loss: 2.4916603565216064


Processing epoch 01:  79%|███████▉  | 3835/4850 [18:23<04:57,  3.41it/s]

Epoch: 2, Loss: 2.5648932456970215


Processing epoch 01:  79%|███████▉  | 3836/4850 [18:23<04:52,  3.47it/s]

Epoch: 2, Loss: 4.221518039703369


Processing epoch 01:  79%|███████▉  | 3837/4850 [18:23<04:54,  3.43it/s]

Epoch: 2, Loss: 2.496257781982422


Processing epoch 01:  79%|███████▉  | 3838/4850 [18:24<04:52,  3.46it/s]

Epoch: 2, Loss: 2.4579131603240967


Processing epoch 01:  79%|███████▉  | 3839/4850 [18:24<04:54,  3.44it/s]

Epoch: 2, Loss: 2.6401519775390625


Processing epoch 01:  79%|███████▉  | 3840/4850 [18:24<04:58,  3.39it/s]

Epoch: 2, Loss: 2.9381532669067383


Processing epoch 01:  79%|███████▉  | 3841/4850 [18:25<05:00,  3.36it/s]

Epoch: 2, Loss: 2.5207996368408203


Processing epoch 01:  79%|███████▉  | 3842/4850 [18:25<04:58,  3.37it/s]

Epoch: 2, Loss: 2.5046913623809814


Processing epoch 01:  79%|███████▉  | 3843/4850 [18:25<04:57,  3.38it/s]

Epoch: 2, Loss: 2.480301856994629


Processing epoch 01:  79%|███████▉  | 3844/4850 [18:25<04:55,  3.40it/s]

Epoch: 2, Loss: 2.574801445007324


Processing epoch 01:  79%|███████▉  | 3845/4850 [18:26<04:53,  3.42it/s]

Epoch: 2, Loss: 1.939227819442749


Processing epoch 01:  79%|███████▉  | 3846/4850 [18:26<04:58,  3.36it/s]

Epoch: 2, Loss: 2.5449838638305664


Processing epoch 01:  79%|███████▉  | 3847/4850 [18:26<04:54,  3.41it/s]

Epoch: 2, Loss: 2.6678361892700195


Processing epoch 01:  79%|███████▉  | 3848/4850 [18:27<04:52,  3.42it/s]

Epoch: 2, Loss: 2.649413585662842


Processing epoch 01:  79%|███████▉  | 3849/4850 [18:27<04:50,  3.45it/s]

Epoch: 2, Loss: 2.544853687286377


Processing epoch 01:  79%|███████▉  | 3850/4850 [18:27<04:49,  3.45it/s]

Epoch: 2, Loss: 2.2016780376434326


Processing epoch 01:  79%|███████▉  | 3851/4850 [18:27<04:49,  3.45it/s]

Epoch: 2, Loss: 2.1329002380371094


Processing epoch 01:  79%|███████▉  | 3852/4850 [18:28<04:50,  3.43it/s]

Epoch: 2, Loss: 2.6323933601379395


Processing epoch 01:  79%|███████▉  | 3853/4850 [18:28<04:48,  3.45it/s]

Epoch: 2, Loss: 2.8399860858917236


Processing epoch 01:  79%|███████▉  | 3854/4850 [18:28<04:46,  3.48it/s]

Epoch: 2, Loss: 2.2040417194366455


Processing epoch 01:  79%|███████▉  | 3855/4850 [18:29<04:45,  3.49it/s]

Epoch: 2, Loss: 2.8994991779327393


Processing epoch 01:  80%|███████▉  | 3856/4850 [18:29<04:43,  3.51it/s]

Epoch: 2, Loss: 2.462851047515869


Processing epoch 01:  80%|███████▉  | 3857/4850 [18:29<04:43,  3.50it/s]

Epoch: 2, Loss: 2.1476495265960693


Processing epoch 01:  80%|███████▉  | 3858/4850 [18:29<04:41,  3.52it/s]

Epoch: 2, Loss: 2.4717226028442383


Processing epoch 01:  80%|███████▉  | 3859/4850 [18:30<04:42,  3.50it/s]

Epoch: 2, Loss: 2.6418662071228027


Processing epoch 01:  80%|███████▉  | 3860/4850 [18:30<04:42,  3.51it/s]

Epoch: 2, Loss: 2.3574700355529785


Processing epoch 01:  80%|███████▉  | 3861/4850 [18:30<04:41,  3.51it/s]

Epoch: 2, Loss: 2.5621519088745117


Processing epoch 01:  80%|███████▉  | 3862/4850 [18:31<04:41,  3.51it/s]

Epoch: 2, Loss: 2.281071901321411


Processing epoch 01:  80%|███████▉  | 3863/4850 [18:31<04:42,  3.50it/s]

Epoch: 2, Loss: 2.4558515548706055


Processing epoch 01:  80%|███████▉  | 3864/4850 [18:31<04:42,  3.49it/s]

Epoch: 2, Loss: 1.8494658470153809


Processing epoch 01:  80%|███████▉  | 3865/4850 [18:31<04:42,  3.49it/s]

Epoch: 2, Loss: 2.776538372039795


Processing epoch 01:  80%|███████▉  | 3866/4850 [18:32<04:40,  3.50it/s]

Epoch: 2, Loss: 2.167127847671509


Processing epoch 01:  80%|███████▉  | 3867/4850 [18:32<04:40,  3.51it/s]

Epoch: 2, Loss: 2.15694260597229


Processing epoch 01:  80%|███████▉  | 3868/4850 [18:32<04:39,  3.51it/s]

Epoch: 2, Loss: 2.443091869354248


Processing epoch 01:  80%|███████▉  | 3869/4850 [18:33<04:39,  3.51it/s]

Epoch: 2, Loss: 2.0393190383911133


Processing epoch 01:  80%|███████▉  | 3870/4850 [18:33<04:43,  3.46it/s]

Epoch: 2, Loss: 2.632669687271118


Processing epoch 01:  80%|███████▉  | 3871/4850 [18:33<04:41,  3.47it/s]

Epoch: 2, Loss: 2.302401065826416


Processing epoch 01:  80%|███████▉  | 3872/4850 [18:34<04:40,  3.49it/s]

Epoch: 2, Loss: 2.7318599224090576


Processing epoch 01:  80%|███████▉  | 3873/4850 [18:34<04:39,  3.50it/s]

Epoch: 2, Loss: 2.375309705734253


Processing epoch 01:  80%|███████▉  | 3874/4850 [18:34<04:37,  3.51it/s]

Epoch: 2, Loss: 2.604532241821289


Processing epoch 01:  80%|███████▉  | 3875/4850 [18:34<04:37,  3.51it/s]

Epoch: 2, Loss: 2.672938823699951


Processing epoch 01:  80%|███████▉  | 3876/4850 [18:35<04:37,  3.51it/s]

Epoch: 2, Loss: 2.6482319831848145


Processing epoch 01:  80%|███████▉  | 3877/4850 [18:35<04:36,  3.53it/s]

Epoch: 2, Loss: 2.3834762573242188


Processing epoch 01:  80%|███████▉  | 3878/4850 [18:35<04:36,  3.52it/s]

Epoch: 2, Loss: 2.5811257362365723


Processing epoch 01:  80%|███████▉  | 3879/4850 [18:35<04:36,  3.52it/s]

Epoch: 2, Loss: 2.3722972869873047


Processing epoch 01:  80%|████████  | 3880/4850 [18:36<04:35,  3.52it/s]

Epoch: 2, Loss: 2.3007097244262695


Processing epoch 01:  80%|████████  | 3881/4850 [18:36<04:37,  3.50it/s]

Epoch: 2, Loss: 2.3376922607421875


Processing epoch 01:  80%|████████  | 3882/4850 [18:36<04:40,  3.45it/s]

Epoch: 2, Loss: 2.8259220123291016


Processing epoch 01:  80%|████████  | 3883/4850 [18:37<04:42,  3.42it/s]

Epoch: 2, Loss: 2.8859877586364746


Processing epoch 01:  80%|████████  | 3884/4850 [18:37<04:46,  3.37it/s]

Epoch: 2, Loss: 2.390655755996704


Processing epoch 01:  80%|████████  | 3885/4850 [18:37<04:49,  3.33it/s]

Epoch: 2, Loss: 2.3999791145324707


Processing epoch 01:  80%|████████  | 3886/4850 [18:38<04:47,  3.35it/s]

Epoch: 2, Loss: 2.8435111045837402


Processing epoch 01:  80%|████████  | 3887/4850 [18:38<04:48,  3.34it/s]

Epoch: 2, Loss: 2.3337886333465576


Processing epoch 01:  80%|████████  | 3888/4850 [18:38<04:46,  3.36it/s]

Epoch: 2, Loss: 2.727299451828003


Processing epoch 01:  80%|████████  | 3889/4850 [18:38<04:45,  3.37it/s]

Epoch: 2, Loss: 2.294321060180664


Processing epoch 01:  80%|████████  | 3890/4850 [18:39<04:45,  3.36it/s]

Epoch: 2, Loss: 2.77209734916687


Processing epoch 01:  80%|████████  | 3891/4850 [18:39<04:46,  3.35it/s]

Epoch: 2, Loss: 2.247558116912842


Processing epoch 01:  80%|████████  | 3892/4850 [18:39<04:44,  3.36it/s]

Epoch: 2, Loss: 2.4465866088867188


Processing epoch 01:  80%|████████  | 3893/4850 [18:40<04:43,  3.38it/s]

Epoch: 2, Loss: 2.730747699737549


Processing epoch 01:  80%|████████  | 3894/4850 [18:40<04:39,  3.42it/s]

Epoch: 2, Loss: 2.62554931640625


Processing epoch 01:  80%|████████  | 3895/4850 [18:40<04:43,  3.37it/s]

Epoch: 2, Loss: 1.9528911113739014


Processing epoch 01:  80%|████████  | 3896/4850 [18:41<04:44,  3.36it/s]

Epoch: 2, Loss: 2.5249404907226562


Processing epoch 01:  80%|████████  | 3897/4850 [18:41<04:39,  3.41it/s]

Epoch: 2, Loss: 2.637054920196533


Processing epoch 01:  80%|████████  | 3898/4850 [18:41<04:37,  3.44it/s]

Epoch: 2, Loss: 2.143019676208496


Processing epoch 01:  80%|████████  | 3899/4850 [18:41<04:35,  3.45it/s]

Epoch: 2, Loss: 1.9330130815505981


Processing epoch 01:  80%|████████  | 3900/4850 [18:42<04:33,  3.47it/s]

Epoch: 2, Loss: 2.1524040699005127


Processing epoch 01:  80%|████████  | 3901/4850 [18:42<04:32,  3.48it/s]

Epoch: 2, Loss: 2.7347326278686523


Processing epoch 01:  80%|████████  | 3902/4850 [18:42<04:30,  3.50it/s]

Epoch: 2, Loss: 3.1138367652893066


Processing epoch 01:  80%|████████  | 3903/4850 [18:43<04:30,  3.50it/s]

Epoch: 2, Loss: 2.450546979904175


Processing epoch 01:  80%|████████  | 3904/4850 [18:43<04:28,  3.52it/s]

Epoch: 2, Loss: 2.4719440937042236


Processing epoch 01:  81%|████████  | 3905/4850 [18:43<04:30,  3.50it/s]

Epoch: 2, Loss: 2.1610472202301025


Processing epoch 01:  81%|████████  | 3906/4850 [18:43<04:29,  3.50it/s]

Epoch: 2, Loss: 2.0249836444854736


Processing epoch 01:  81%|████████  | 3907/4850 [18:44<04:27,  3.52it/s]

Epoch: 2, Loss: 4.08406925201416


Processing epoch 01:  81%|████████  | 3908/4850 [18:44<04:29,  3.50it/s]

Epoch: 2, Loss: 2.260620594024658


Processing epoch 01:  81%|████████  | 3909/4850 [18:44<04:28,  3.50it/s]

Epoch: 2, Loss: 2.5026376247406006


Processing epoch 01:  81%|████████  | 3910/4850 [18:45<04:27,  3.51it/s]

Epoch: 2, Loss: 2.2769124507904053


Processing epoch 01:  81%|████████  | 3911/4850 [18:45<04:27,  3.50it/s]

Epoch: 2, Loss: 2.350260019302368


Processing epoch 01:  81%|████████  | 3912/4850 [18:45<04:27,  3.51it/s]

Epoch: 2, Loss: 2.5908761024475098


Processing epoch 01:  81%|████████  | 3913/4850 [18:45<04:26,  3.52it/s]

Epoch: 2, Loss: 2.1071016788482666


Processing epoch 01:  81%|████████  | 3914/4850 [18:46<04:25,  3.53it/s]

Epoch: 2, Loss: 2.185990333557129


Processing epoch 01:  81%|████████  | 3915/4850 [18:46<04:25,  3.52it/s]

Epoch: 2, Loss: 2.7663941383361816


Processing epoch 01:  81%|████████  | 3916/4850 [18:46<04:27,  3.49it/s]

Epoch: 2, Loss: 2.7223153114318848


Processing epoch 01:  81%|████████  | 3917/4850 [18:47<04:29,  3.46it/s]

Epoch: 2, Loss: 2.313656806945801


Processing epoch 01:  81%|████████  | 3918/4850 [18:47<04:28,  3.47it/s]

Epoch: 2, Loss: 2.424647331237793


Processing epoch 01:  81%|████████  | 3919/4850 [18:47<04:27,  3.48it/s]

Epoch: 2, Loss: 2.1209540367126465


Processing epoch 01:  81%|████████  | 3920/4850 [18:47<04:25,  3.50it/s]

Epoch: 2, Loss: 2.028113842010498


Processing epoch 01:  81%|████████  | 3921/4850 [18:48<04:24,  3.52it/s]

Epoch: 2, Loss: 2.1704556941986084


Processing epoch 01:  81%|████████  | 3922/4850 [18:48<04:23,  3.52it/s]

Epoch: 2, Loss: 3.480447769165039


Processing epoch 01:  81%|████████  | 3923/4850 [18:48<04:23,  3.52it/s]

Epoch: 2, Loss: 3.2137465476989746


Processing epoch 01:  81%|████████  | 3924/4850 [18:49<04:22,  3.53it/s]

Epoch: 2, Loss: 2.1613614559173584


Processing epoch 01:  81%|████████  | 3925/4850 [18:49<04:23,  3.50it/s]

Epoch: 2, Loss: 2.700810194015503


Processing epoch 01:  81%|████████  | 3926/4850 [18:49<04:22,  3.52it/s]

Epoch: 2, Loss: 2.913160562515259


Processing epoch 01:  81%|████████  | 3927/4850 [18:49<04:22,  3.52it/s]

Epoch: 2, Loss: 2.4001107215881348


Processing epoch 01:  81%|████████  | 3928/4850 [18:50<04:21,  3.53it/s]

Epoch: 2, Loss: 2.5396175384521484


Processing epoch 01:  81%|████████  | 3929/4850 [18:50<04:20,  3.53it/s]

Epoch: 2, Loss: 2.2334513664245605


Processing epoch 01:  81%|████████  | 3930/4850 [18:50<04:20,  3.53it/s]

Epoch: 2, Loss: 2.793262481689453


Processing epoch 01:  81%|████████  | 3931/4850 [18:51<04:20,  3.52it/s]

Epoch: 2, Loss: 2.2408671379089355


Processing epoch 01:  81%|████████  | 3932/4850 [18:51<04:26,  3.45it/s]

Epoch: 2, Loss: 2.4362375736236572


Processing epoch 01:  81%|████████  | 3933/4850 [18:51<04:28,  3.41it/s]

Epoch: 2, Loss: 2.6763782501220703


Processing epoch 01:  81%|████████  | 3934/4850 [18:51<04:26,  3.44it/s]

Epoch: 2, Loss: 2.878241539001465


Processing epoch 01:  81%|████████  | 3935/4850 [18:52<04:25,  3.44it/s]

Epoch: 2, Loss: 2.4759130477905273


Processing epoch 01:  81%|████████  | 3936/4850 [18:52<04:24,  3.46it/s]

Epoch: 2, Loss: 2.1414451599121094


Processing epoch 01:  81%|████████  | 3937/4850 [18:52<04:21,  3.49it/s]

Epoch: 2, Loss: 2.5259714126586914


Processing epoch 01:  81%|████████  | 3938/4850 [18:53<04:20,  3.50it/s]

Epoch: 2, Loss: 2.7042267322540283


Processing epoch 01:  81%|████████  | 3939/4850 [18:53<04:22,  3.47it/s]

Epoch: 2, Loss: 2.424574375152588


Processing epoch 01:  81%|████████  | 3940/4850 [18:53<04:20,  3.50it/s]

Epoch: 2, Loss: 3.4952497482299805


Processing epoch 01:  81%|████████▏ | 3941/4850 [18:53<04:23,  3.44it/s]

Epoch: 2, Loss: 1.851076364517212


Processing epoch 01:  81%|████████▏ | 3942/4850 [18:54<04:24,  3.43it/s]

Epoch: 2, Loss: 2.5895400047302246


Processing epoch 01:  81%|████████▏ | 3943/4850 [18:54<04:25,  3.42it/s]

Epoch: 2, Loss: 2.383615255355835


Processing epoch 01:  81%|████████▏ | 3944/4850 [18:54<04:27,  3.38it/s]

Epoch: 2, Loss: 2.4316458702087402


Processing epoch 01:  81%|████████▏ | 3945/4850 [18:55<04:26,  3.39it/s]

Epoch: 2, Loss: 2.758077621459961


Processing epoch 01:  81%|████████▏ | 3946/4850 [18:55<04:31,  3.33it/s]

Epoch: 2, Loss: 2.441844940185547


Processing epoch 01:  81%|████████▏ | 3947/4850 [18:55<04:30,  3.34it/s]

Epoch: 2, Loss: 2.5808491706848145


Processing epoch 01:  81%|████████▏ | 3948/4850 [18:56<04:29,  3.34it/s]

Epoch: 2, Loss: 2.2197978496551514


Processing epoch 01:  81%|████████▏ | 3949/4850 [18:56<04:27,  3.37it/s]

Epoch: 2, Loss: 2.153977394104004


Processing epoch 01:  81%|████████▏ | 3950/4850 [18:56<04:24,  3.41it/s]

Epoch: 2, Loss: 2.400630474090576


Processing epoch 01:  81%|████████▏ | 3951/4850 [18:56<04:21,  3.44it/s]

Epoch: 2, Loss: 3.052804946899414


Processing epoch 01:  81%|████████▏ | 3952/4850 [18:57<04:22,  3.42it/s]

Epoch: 2, Loss: 2.69166898727417


Processing epoch 01:  82%|████████▏ | 3953/4850 [18:57<04:20,  3.44it/s]

Epoch: 2, Loss: 2.1319384574890137


Processing epoch 01:  82%|████████▏ | 3954/4850 [18:57<04:20,  3.44it/s]

Epoch: 2, Loss: 1.9113514423370361


Processing epoch 01:  82%|████████▏ | 3955/4850 [18:58<04:18,  3.47it/s]

Epoch: 2, Loss: 2.582190990447998


Processing epoch 01:  82%|████████▏ | 3956/4850 [18:58<04:16,  3.48it/s]

Epoch: 2, Loss: 3.132993698120117


Processing epoch 01:  82%|████████▏ | 3957/4850 [18:58<04:20,  3.43it/s]

Epoch: 2, Loss: 2.257310390472412


Processing epoch 01:  82%|████████▏ | 3958/4850 [18:58<04:17,  3.46it/s]

Epoch: 2, Loss: 2.6238648891448975


Processing epoch 01:  82%|████████▏ | 3959/4850 [18:59<04:18,  3.45it/s]

Epoch: 2, Loss: 2.3515729904174805


Processing epoch 01:  82%|████████▏ | 3960/4850 [18:59<04:16,  3.47it/s]

Epoch: 2, Loss: 2.4222331047058105


Processing epoch 01:  82%|████████▏ | 3961/4850 [18:59<04:16,  3.46it/s]

Epoch: 2, Loss: 3.1729748249053955


Processing epoch 01:  82%|████████▏ | 3962/4850 [19:00<04:14,  3.49it/s]

Epoch: 2, Loss: 2.4453043937683105


Processing epoch 01:  82%|████████▏ | 3963/4850 [19:00<04:13,  3.49it/s]

Epoch: 2, Loss: 2.459634780883789


Processing epoch 01:  82%|████████▏ | 3964/4850 [19:00<04:14,  3.48it/s]

Epoch: 2, Loss: 3.0033364295959473


Processing epoch 01:  82%|████████▏ | 3965/4850 [19:00<04:13,  3.49it/s]

Epoch: 2, Loss: 2.6387529373168945


Processing epoch 01:  82%|████████▏ | 3966/4850 [19:01<04:12,  3.50it/s]

Epoch: 2, Loss: 2.6119093894958496


Processing epoch 01:  82%|████████▏ | 3967/4850 [19:01<04:11,  3.51it/s]

Epoch: 2, Loss: 2.645463705062866


Processing epoch 01:  82%|████████▏ | 3968/4850 [19:01<04:13,  3.47it/s]

Epoch: 2, Loss: 2.386493682861328


Processing epoch 01:  82%|████████▏ | 3969/4850 [19:02<04:13,  3.48it/s]

Epoch: 2, Loss: 1.8615083694458008


Processing epoch 01:  82%|████████▏ | 3970/4850 [19:02<04:14,  3.46it/s]

Epoch: 2, Loss: 2.304504871368408


Processing epoch 01:  82%|████████▏ | 3971/4850 [19:02<04:12,  3.47it/s]

Epoch: 2, Loss: 2.17120099067688


Processing epoch 01:  82%|████████▏ | 3972/4850 [19:02<04:11,  3.49it/s]

Epoch: 2, Loss: 2.5820374488830566


Processing epoch 01:  82%|████████▏ | 3973/4850 [19:03<04:10,  3.50it/s]

Epoch: 2, Loss: 2.562455892562866


Processing epoch 01:  82%|████████▏ | 3974/4850 [19:03<04:11,  3.49it/s]

Epoch: 2, Loss: 2.412689447402954


Processing epoch 01:  82%|████████▏ | 3975/4850 [19:03<04:11,  3.49it/s]

Epoch: 2, Loss: 3.1900949478149414


Processing epoch 01:  82%|████████▏ | 3976/4850 [19:04<04:10,  3.49it/s]

Epoch: 2, Loss: 2.5419957637786865


Processing epoch 01:  82%|████████▏ | 3977/4850 [19:04<04:09,  3.50it/s]

Epoch: 2, Loss: 2.6142826080322266


Processing epoch 01:  82%|████████▏ | 3978/4850 [19:04<04:09,  3.50it/s]

Epoch: 2, Loss: 2.3875718116760254


Processing epoch 01:  82%|████████▏ | 3979/4850 [19:04<04:07,  3.51it/s]

Epoch: 2, Loss: 2.722086191177368


Processing epoch 01:  82%|████████▏ | 3980/4850 [19:05<04:08,  3.50it/s]

Epoch: 2, Loss: 2.5579307079315186


Processing epoch 01:  82%|████████▏ | 3981/4850 [19:05<04:07,  3.51it/s]

Epoch: 2, Loss: 2.3114123344421387


Processing epoch 01:  82%|████████▏ | 3982/4850 [19:05<04:05,  3.53it/s]

Epoch: 2, Loss: 2.8590450286865234


Processing epoch 01:  82%|████████▏ | 3983/4850 [19:06<04:06,  3.51it/s]

Epoch: 2, Loss: 2.5097146034240723


Processing epoch 01:  82%|████████▏ | 3984/4850 [19:06<04:11,  3.44it/s]

Epoch: 2, Loss: 2.7807259559631348


Processing epoch 01:  82%|████████▏ | 3985/4850 [19:06<04:09,  3.46it/s]

Epoch: 2, Loss: 2.3046388626098633


Processing epoch 01:  82%|████████▏ | 3986/4850 [19:06<04:13,  3.41it/s]

Epoch: 2, Loss: 2.7145590782165527


Processing epoch 01:  82%|████████▏ | 3987/4850 [19:07<04:10,  3.45it/s]

Epoch: 2, Loss: 2.3119025230407715


Processing epoch 01:  82%|████████▏ | 3988/4850 [19:07<04:07,  3.48it/s]

Epoch: 2, Loss: 2.5347485542297363


Processing epoch 01:  82%|████████▏ | 3989/4850 [19:07<04:05,  3.50it/s]

Epoch: 2, Loss: 2.4982213973999023


Processing epoch 01:  82%|████████▏ | 3990/4850 [19:08<04:05,  3.50it/s]

Epoch: 2, Loss: 2.4269986152648926


Processing epoch 01:  82%|████████▏ | 3991/4850 [19:08<04:05,  3.50it/s]

Epoch: 2, Loss: 2.424808979034424


Processing epoch 01:  82%|████████▏ | 3992/4850 [19:08<04:10,  3.42it/s]

Epoch: 2, Loss: 2.196962356567383


Processing epoch 01:  82%|████████▏ | 3993/4850 [19:08<04:13,  3.37it/s]

Epoch: 2, Loss: 3.209238052368164


Processing epoch 01:  82%|████████▏ | 3994/4850 [19:09<04:18,  3.32it/s]

Epoch: 2, Loss: 2.210543155670166


Processing epoch 01:  82%|████████▏ | 3995/4850 [19:09<04:17,  3.32it/s]

Epoch: 2, Loss: 2.7651681900024414


Processing epoch 01:  82%|████████▏ | 3996/4850 [19:09<04:13,  3.36it/s]

Epoch: 2, Loss: 3.833909749984741


Processing epoch 01:  82%|████████▏ | 3997/4850 [19:10<04:12,  3.38it/s]

Epoch: 2, Loss: 2.190995216369629


Processing epoch 01:  82%|████████▏ | 3998/4850 [19:10<04:16,  3.32it/s]

Epoch: 2, Loss: 3.2028493881225586


Processing epoch 01:  82%|████████▏ | 3999/4850 [19:10<04:17,  3.31it/s]

Epoch: 2, Loss: 2.274224281311035


Processing epoch 01:  82%|████████▏ | 4000/4850 [19:11<04:13,  3.35it/s]

Epoch: 2, Loss: 2.532071828842163


Processing epoch 01:  82%|████████▏ | 4001/4850 [19:11<04:09,  3.40it/s]

Epoch: 2, Loss: 2.476764678955078


Processing epoch 01:  83%|████████▎ | 4002/4850 [19:11<04:08,  3.41it/s]

Epoch: 2, Loss: 1.9760453701019287


Processing epoch 01:  83%|████████▎ | 4003/4850 [19:11<04:07,  3.43it/s]

Epoch: 2, Loss: 2.4930245876312256


Processing epoch 01:  83%|████████▎ | 4004/4850 [19:12<04:06,  3.43it/s]

Epoch: 2, Loss: 2.115252733230591


Processing epoch 01:  83%|████████▎ | 4005/4850 [19:12<04:05,  3.44it/s]

Epoch: 2, Loss: 2.8751683235168457


Processing epoch 01:  83%|████████▎ | 4006/4850 [19:12<04:03,  3.46it/s]

Epoch: 2, Loss: 2.5625030994415283


Processing epoch 01:  83%|████████▎ | 4007/4850 [19:13<04:02,  3.47it/s]

Epoch: 2, Loss: 2.6109910011291504


Processing epoch 01:  83%|████████▎ | 4008/4850 [19:13<04:01,  3.48it/s]

Epoch: 2, Loss: 2.6817264556884766


Processing epoch 01:  83%|████████▎ | 4009/4850 [19:13<04:00,  3.50it/s]

Epoch: 2, Loss: 2.4863038063049316


Processing epoch 01:  83%|████████▎ | 4010/4850 [19:13<04:00,  3.50it/s]

Epoch: 2, Loss: 2.5970025062561035


Processing epoch 01:  83%|████████▎ | 4011/4850 [19:14<04:02,  3.47it/s]

Epoch: 2, Loss: 2.9408042430877686


Processing epoch 01:  83%|████████▎ | 4012/4850 [19:14<04:00,  3.48it/s]

Epoch: 2, Loss: 2.36330509185791


Processing epoch 01:  83%|████████▎ | 4013/4850 [19:14<04:00,  3.47it/s]

Epoch: 2, Loss: 1.9561870098114014


Processing epoch 01:  83%|████████▎ | 4014/4850 [19:15<03:59,  3.49it/s]

Epoch: 2, Loss: 2.825979709625244


Processing epoch 01:  83%|████████▎ | 4015/4850 [19:15<03:58,  3.50it/s]

Epoch: 2, Loss: 2.196626663208008


Processing epoch 01:  83%|████████▎ | 4016/4850 [19:15<04:02,  3.44it/s]

Epoch: 2, Loss: 2.5657131671905518


Processing epoch 01:  83%|████████▎ | 4017/4850 [19:15<03:59,  3.48it/s]

Epoch: 2, Loss: 2.1663429737091064


Processing epoch 01:  83%|████████▎ | 4018/4850 [19:16<03:57,  3.50it/s]

Epoch: 2, Loss: 2.2318859100341797


Processing epoch 01:  83%|████████▎ | 4019/4850 [19:16<03:56,  3.52it/s]

Epoch: 2, Loss: 2.419952392578125


Processing epoch 01:  83%|████████▎ | 4020/4850 [19:16<03:56,  3.50it/s]

Epoch: 2, Loss: 2.5965518951416016


Processing epoch 01:  83%|████████▎ | 4021/4850 [19:17<03:56,  3.51it/s]

Epoch: 2, Loss: 2.856865882873535


Processing epoch 01:  83%|████████▎ | 4022/4850 [19:17<03:58,  3.47it/s]

Epoch: 2, Loss: 2.645305871963501


Processing epoch 01:  83%|████████▎ | 4023/4850 [19:17<03:58,  3.46it/s]

Epoch: 2, Loss: 2.7113027572631836


Processing epoch 01:  83%|████████▎ | 4024/4850 [19:17<03:56,  3.49it/s]

Epoch: 2, Loss: 2.226409912109375


Processing epoch 01:  83%|████████▎ | 4025/4850 [19:18<03:56,  3.49it/s]

Epoch: 2, Loss: 2.31418514251709


Processing epoch 01:  83%|████████▎ | 4026/4850 [19:18<03:56,  3.49it/s]

Epoch: 2, Loss: 2.6995320320129395


Processing epoch 01:  83%|████████▎ | 4027/4850 [19:18<03:55,  3.50it/s]

Epoch: 2, Loss: 2.426830768585205


Processing epoch 01:  83%|████████▎ | 4028/4850 [19:19<03:54,  3.51it/s]

Epoch: 2, Loss: 2.4534716606140137


Processing epoch 01:  83%|████████▎ | 4029/4850 [19:19<03:53,  3.51it/s]

Epoch: 2, Loss: 2.4534528255462646


Processing epoch 01:  83%|████████▎ | 4030/4850 [19:19<03:53,  3.51it/s]

Epoch: 2, Loss: 3.2071309089660645


Processing epoch 01:  83%|████████▎ | 4031/4850 [19:19<03:52,  3.53it/s]

Epoch: 2, Loss: 2.7560372352600098


Processing epoch 01:  83%|████████▎ | 4032/4850 [19:20<03:52,  3.52it/s]

Epoch: 2, Loss: 2.3003149032592773


Processing epoch 01:  83%|████████▎ | 4033/4850 [19:20<03:55,  3.47it/s]

Epoch: 2, Loss: 2.8840765953063965


Processing epoch 01:  83%|████████▎ | 4034/4850 [19:20<03:52,  3.50it/s]

Epoch: 2, Loss: 2.481327772140503


Processing epoch 01:  83%|████████▎ | 4035/4850 [19:21<03:54,  3.48it/s]

Epoch: 2, Loss: 2.2743444442749023


Processing epoch 01:  83%|████████▎ | 4036/4850 [19:21<03:54,  3.47it/s]

Epoch: 2, Loss: 2.4785594940185547


Processing epoch 01:  83%|████████▎ | 4037/4850 [19:21<03:58,  3.42it/s]

Epoch: 2, Loss: 2.083059787750244


Processing epoch 01:  83%|████████▎ | 4038/4850 [19:21<04:01,  3.37it/s]

Epoch: 2, Loss: 2.6152684688568115


Processing epoch 01:  83%|████████▎ | 4039/4850 [19:22<04:01,  3.36it/s]

Epoch: 2, Loss: 2.3419859409332275


Processing epoch 01:  83%|████████▎ | 4040/4850 [19:22<04:01,  3.35it/s]

Epoch: 2, Loss: 2.5360560417175293


Processing epoch 01:  83%|████████▎ | 4041/4850 [19:22<03:59,  3.38it/s]

Epoch: 2, Loss: 2.7949814796447754


Processing epoch 01:  83%|████████▎ | 4042/4850 [19:23<04:00,  3.35it/s]

Epoch: 2, Loss: 2.6991991996765137


Processing epoch 01:  83%|████████▎ | 4043/4850 [19:23<04:00,  3.36it/s]

Epoch: 2, Loss: 2.640573263168335


Processing epoch 01:  83%|████████▎ | 4044/4850 [19:23<04:02,  3.33it/s]

Epoch: 2, Loss: 2.4424638748168945


Processing epoch 01:  83%|████████▎ | 4045/4850 [19:24<04:01,  3.34it/s]

Epoch: 2, Loss: 2.237483263015747


Processing epoch 01:  83%|████████▎ | 4046/4850 [19:24<03:59,  3.35it/s]

Epoch: 2, Loss: 2.4989418983459473


Processing epoch 01:  83%|████████▎ | 4047/4850 [19:24<03:59,  3.36it/s]

Epoch: 2, Loss: 2.647594928741455


Processing epoch 01:  83%|████████▎ | 4048/4850 [19:24<03:56,  3.39it/s]

Epoch: 2, Loss: 2.4266111850738525


Processing epoch 01:  83%|████████▎ | 4049/4850 [19:25<03:58,  3.37it/s]

Epoch: 2, Loss: 2.409623622894287


Processing epoch 01:  84%|████████▎ | 4050/4850 [19:25<04:00,  3.33it/s]

Epoch: 2, Loss: 2.4903392791748047


Processing epoch 01:  84%|████████▎ | 4051/4850 [19:25<03:57,  3.36it/s]

Epoch: 2, Loss: 2.7194113731384277


Processing epoch 01:  84%|████████▎ | 4052/4850 [19:26<03:53,  3.41it/s]

Epoch: 2, Loss: 2.976663589477539


Processing epoch 01:  84%|████████▎ | 4053/4850 [19:26<03:51,  3.45it/s]

Epoch: 2, Loss: 2.1305055618286133


Processing epoch 01:  84%|████████▎ | 4054/4850 [19:26<03:50,  3.45it/s]

Epoch: 2, Loss: 2.212696075439453


Processing epoch 01:  84%|████████▎ | 4055/4850 [19:27<03:49,  3.47it/s]

Epoch: 2, Loss: 2.4313788414001465


Processing epoch 01:  84%|████████▎ | 4056/4850 [19:27<03:48,  3.48it/s]

Epoch: 2, Loss: 2.549471378326416


Processing epoch 01:  84%|████████▎ | 4057/4850 [19:27<03:48,  3.47it/s]

Epoch: 2, Loss: 2.3179240226745605


Processing epoch 01:  84%|████████▎ | 4058/4850 [19:27<03:47,  3.48it/s]

Epoch: 2, Loss: 2.4215903282165527


Processing epoch 01:  84%|████████▎ | 4059/4850 [19:28<03:46,  3.50it/s]

Epoch: 2, Loss: 2.1325387954711914


Processing epoch 01:  84%|████████▎ | 4060/4850 [19:28<03:46,  3.48it/s]

Epoch: 2, Loss: 2.360795497894287


Processing epoch 01:  84%|████████▎ | 4061/4850 [19:28<03:45,  3.50it/s]

Epoch: 2, Loss: 2.3375000953674316


Processing epoch 01:  84%|████████▍ | 4062/4850 [19:29<03:48,  3.45it/s]

Epoch: 2, Loss: 2.9623026847839355


Processing epoch 01:  84%|████████▍ | 4063/4850 [19:29<03:46,  3.47it/s]

Epoch: 2, Loss: 2.4105043411254883


Processing epoch 01:  84%|████████▍ | 4064/4850 [19:29<03:45,  3.49it/s]

Epoch: 2, Loss: 2.1234519481658936


Processing epoch 01:  84%|████████▍ | 4065/4850 [19:29<03:44,  3.50it/s]

Epoch: 2, Loss: 2.224743604660034


Processing epoch 01:  84%|████████▍ | 4066/4850 [19:30<03:43,  3.50it/s]

Epoch: 2, Loss: 2.4040396213531494


Processing epoch 01:  84%|████████▍ | 4067/4850 [19:30<03:43,  3.50it/s]

Epoch: 2, Loss: 2.589062213897705


Processing epoch 01:  84%|████████▍ | 4068/4850 [19:30<03:44,  3.48it/s]

Epoch: 2, Loss: 2.5026721954345703


Processing epoch 01:  84%|████████▍ | 4069/4850 [19:31<03:42,  3.52it/s]

Epoch: 2, Loss: 2.9260473251342773


Processing epoch 01:  84%|████████▍ | 4070/4850 [19:31<03:41,  3.51it/s]

Epoch: 2, Loss: 3.2520508766174316


Processing epoch 01:  84%|████████▍ | 4071/4850 [19:31<03:41,  3.51it/s]

Epoch: 2, Loss: 2.786562442779541


Processing epoch 01:  84%|████████▍ | 4072/4850 [19:31<03:40,  3.53it/s]

Epoch: 2, Loss: 2.4991016387939453


Processing epoch 01:  84%|████████▍ | 4073/4850 [19:32<03:40,  3.52it/s]

Epoch: 2, Loss: 2.9511325359344482


Processing epoch 01:  84%|████████▍ | 4074/4850 [19:32<03:40,  3.52it/s]

Epoch: 2, Loss: 2.501934051513672


Processing epoch 01:  84%|████████▍ | 4075/4850 [19:32<03:41,  3.50it/s]

Epoch: 2, Loss: 2.3956894874572754


Processing epoch 01:  84%|████████▍ | 4076/4850 [19:33<03:41,  3.50it/s]

Epoch: 2, Loss: 2.154493808746338


Processing epoch 01:  84%|████████▍ | 4077/4850 [19:33<03:42,  3.48it/s]

Epoch: 2, Loss: 2.6022181510925293


Processing epoch 01:  84%|████████▍ | 4078/4850 [19:33<03:41,  3.49it/s]

Epoch: 2, Loss: 2.0654044151306152


Processing epoch 01:  84%|████████▍ | 4079/4850 [19:33<03:40,  3.50it/s]

Epoch: 2, Loss: 2.216777801513672


Processing epoch 01:  84%|████████▍ | 4080/4850 [19:34<03:39,  3.51it/s]

Epoch: 2, Loss: 1.9253143072128296


Processing epoch 01:  84%|████████▍ | 4081/4850 [19:34<03:40,  3.49it/s]

Epoch: 2, Loss: 2.5242819786071777


Processing epoch 01:  84%|████████▍ | 4082/4850 [19:34<03:39,  3.50it/s]

Epoch: 2, Loss: 2.4883227348327637


Processing epoch 01:  84%|████████▍ | 4083/4850 [19:35<03:38,  3.51it/s]

Epoch: 2, Loss: 2.11395001411438


Processing epoch 01:  84%|████████▍ | 4084/4850 [19:35<03:40,  3.47it/s]

Epoch: 2, Loss: 2.4985573291778564


Processing epoch 01:  84%|████████▍ | 4085/4850 [19:35<03:40,  3.46it/s]

Epoch: 2, Loss: 2.3099260330200195


Processing epoch 01:  84%|████████▍ | 4086/4850 [19:35<03:42,  3.43it/s]

Epoch: 2, Loss: 2.791886806488037


Processing epoch 01:  84%|████████▍ | 4087/4850 [19:36<03:41,  3.44it/s]

Epoch: 2, Loss: 2.647660732269287


Processing epoch 01:  84%|████████▍ | 4088/4850 [19:36<03:44,  3.40it/s]

Epoch: 2, Loss: 2.484281539916992


Processing epoch 01:  84%|████████▍ | 4089/4850 [19:36<03:40,  3.45it/s]

Epoch: 2, Loss: 2.8434462547302246


Processing epoch 01:  84%|████████▍ | 4090/4850 [19:37<03:40,  3.45it/s]

Epoch: 2, Loss: 2.601048469543457


Processing epoch 01:  84%|████████▍ | 4091/4850 [19:37<03:39,  3.46it/s]

Epoch: 2, Loss: 2.254730701446533


Processing epoch 01:  84%|████████▍ | 4092/4850 [19:37<03:41,  3.43it/s]

Epoch: 2, Loss: 2.5422003269195557


Processing epoch 01:  84%|████████▍ | 4093/4850 [19:37<03:44,  3.38it/s]

Epoch: 2, Loss: 2.361752986907959


Processing epoch 01:  84%|████████▍ | 4094/4850 [19:38<03:46,  3.34it/s]

Epoch: 2, Loss: 2.3902363777160645


Processing epoch 01:  84%|████████▍ | 4095/4850 [19:38<03:45,  3.34it/s]

Epoch: 2, Loss: 2.8046579360961914


Processing epoch 01:  84%|████████▍ | 4096/4850 [19:38<03:45,  3.34it/s]

Epoch: 2, Loss: 2.451714038848877


Processing epoch 01:  84%|████████▍ | 4097/4850 [19:39<03:43,  3.37it/s]

Epoch: 2, Loss: 2.3060646057128906


Processing epoch 01:  84%|████████▍ | 4098/4850 [19:39<03:41,  3.40it/s]

Epoch: 2, Loss: 2.689849615097046


Processing epoch 01:  85%|████████▍ | 4099/4850 [19:39<03:40,  3.40it/s]

Epoch: 2, Loss: 2.8000292778015137


Processing epoch 01:  85%|████████▍ | 4100/4850 [19:40<03:40,  3.40it/s]

Epoch: 2, Loss: 2.997718334197998


Processing epoch 01:  85%|████████▍ | 4101/4850 [19:40<03:40,  3.40it/s]

Epoch: 2, Loss: 2.399258613586426


Processing epoch 01:  85%|████████▍ | 4102/4850 [19:40<03:42,  3.36it/s]

Epoch: 2, Loss: 1.6022381782531738


Processing epoch 01:  85%|████████▍ | 4103/4850 [19:40<03:40,  3.39it/s]

Epoch: 2, Loss: 1.8925892114639282


Processing epoch 01:  85%|████████▍ | 4104/4850 [19:41<03:37,  3.44it/s]

Epoch: 2, Loss: 2.8253684043884277


Processing epoch 01:  85%|████████▍ | 4105/4850 [19:41<03:35,  3.45it/s]

Epoch: 2, Loss: 2.6175966262817383


Processing epoch 01:  85%|████████▍ | 4106/4850 [19:41<03:36,  3.44it/s]

Epoch: 2, Loss: 2.837982177734375


Processing epoch 01:  85%|████████▍ | 4107/4850 [19:42<03:34,  3.47it/s]

Epoch: 2, Loss: 2.6463875770568848


Processing epoch 01:  85%|████████▍ | 4108/4850 [19:42<03:33,  3.48it/s]

Epoch: 2, Loss: 2.3176255226135254


Processing epoch 01:  85%|████████▍ | 4109/4850 [19:42<03:31,  3.50it/s]

Epoch: 2, Loss: 2.214634895324707


Processing epoch 01:  85%|████████▍ | 4110/4850 [19:42<03:31,  3.51it/s]

Epoch: 2, Loss: 2.257108688354492


Processing epoch 01:  85%|████████▍ | 4111/4850 [19:43<03:30,  3.51it/s]

Epoch: 2, Loss: 2.480714797973633


Processing epoch 01:  85%|████████▍ | 4112/4850 [19:43<03:30,  3.50it/s]

Epoch: 2, Loss: 2.485076427459717


Processing epoch 01:  85%|████████▍ | 4113/4850 [19:43<03:29,  3.51it/s]

Epoch: 2, Loss: 2.4871091842651367


Processing epoch 01:  85%|████████▍ | 4114/4850 [19:44<03:28,  3.53it/s]

Epoch: 2, Loss: 2.8262901306152344


Processing epoch 01:  85%|████████▍ | 4115/4850 [19:44<03:27,  3.53it/s]

Epoch: 2, Loss: 2.746797561645508


Processing epoch 01:  85%|████████▍ | 4116/4850 [19:44<03:28,  3.52it/s]

Epoch: 2, Loss: 1.9396758079528809


Processing epoch 01:  85%|████████▍ | 4117/4850 [19:44<03:30,  3.49it/s]

Epoch: 2, Loss: 2.143188238143921


Processing epoch 01:  85%|████████▍ | 4118/4850 [19:45<03:30,  3.48it/s]

Epoch: 2, Loss: 2.7158937454223633


Processing epoch 01:  85%|████████▍ | 4119/4850 [19:45<03:30,  3.48it/s]

Epoch: 2, Loss: 2.5348362922668457


Processing epoch 01:  85%|████████▍ | 4120/4850 [19:45<03:29,  3.49it/s]

Epoch: 2, Loss: 2.6563339233398438


Processing epoch 01:  85%|████████▍ | 4121/4850 [19:46<03:28,  3.50it/s]

Epoch: 2, Loss: 2.609595775604248


Processing epoch 01:  85%|████████▍ | 4122/4850 [19:46<03:27,  3.51it/s]

Epoch: 2, Loss: 2.1195735931396484


Processing epoch 01:  85%|████████▌ | 4123/4850 [19:46<03:26,  3.51it/s]

Epoch: 2, Loss: 2.350567579269409


Processing epoch 01:  85%|████████▌ | 4124/4850 [19:46<03:28,  3.48it/s]

Epoch: 2, Loss: 2.4411184787750244


Processing epoch 01:  85%|████████▌ | 4125/4850 [19:47<03:27,  3.49it/s]

Epoch: 2, Loss: 2.778674602508545


Processing epoch 01:  85%|████████▌ | 4126/4850 [19:47<03:26,  3.50it/s]

Epoch: 2, Loss: 3.3370919227600098


Processing epoch 01:  85%|████████▌ | 4127/4850 [19:47<03:25,  3.51it/s]

Epoch: 2, Loss: 2.6190600395202637


Processing epoch 01:  85%|████████▌ | 4128/4850 [19:48<03:27,  3.48it/s]

Epoch: 2, Loss: 2.6222033500671387


Processing epoch 01:  85%|████████▌ | 4129/4850 [19:48<03:26,  3.49it/s]

Epoch: 2, Loss: 2.6405296325683594


Processing epoch 01:  85%|████████▌ | 4130/4850 [19:48<03:24,  3.51it/s]

Epoch: 2, Loss: 2.4042539596557617


Processing epoch 01:  85%|████████▌ | 4131/4850 [19:48<03:24,  3.52it/s]

Epoch: 2, Loss: 2.7230772972106934


Processing epoch 01:  85%|████████▌ | 4132/4850 [19:49<03:23,  3.52it/s]

Epoch: 2, Loss: 2.038728713989258


Processing epoch 01:  85%|████████▌ | 4133/4850 [19:49<03:25,  3.50it/s]

Epoch: 2, Loss: 2.0145325660705566


Processing epoch 01:  85%|████████▌ | 4134/4850 [19:49<03:24,  3.51it/s]

Epoch: 2, Loss: 2.5092225074768066


Processing epoch 01:  85%|████████▌ | 4135/4850 [19:50<03:23,  3.51it/s]

Epoch: 2, Loss: 2.526942253112793


Processing epoch 01:  85%|████████▌ | 4136/4850 [19:50<03:23,  3.51it/s]

Epoch: 2, Loss: 2.392904281616211


Processing epoch 01:  85%|████████▌ | 4137/4850 [19:50<03:22,  3.53it/s]

Epoch: 2, Loss: 2.1314098834991455


Processing epoch 01:  85%|████████▌ | 4138/4850 [19:50<03:24,  3.49it/s]

Epoch: 2, Loss: 2.4373779296875


Processing epoch 01:  85%|████████▌ | 4139/4850 [19:51<03:25,  3.46it/s]

Epoch: 2, Loss: 2.6045706272125244


Processing epoch 01:  85%|████████▌ | 4140/4850 [19:51<03:23,  3.48it/s]

Epoch: 2, Loss: 2.2022082805633545


Processing epoch 01:  85%|████████▌ | 4141/4850 [19:51<03:24,  3.48it/s]

Epoch: 2, Loss: 2.7956900596618652


Processing epoch 01:  85%|████████▌ | 4142/4850 [19:52<03:23,  3.48it/s]

Epoch: 2, Loss: 2.28385329246521


Processing epoch 01:  85%|████████▌ | 4143/4850 [19:52<03:21,  3.50it/s]

Epoch: 2, Loss: 2.611936569213867


Processing epoch 01:  85%|████████▌ | 4144/4850 [19:52<03:20,  3.52it/s]

Epoch: 2, Loss: 2.4648146629333496


Processing epoch 01:  85%|████████▌ | 4145/4850 [19:52<03:20,  3.52it/s]

Epoch: 2, Loss: 2.689981698989868


Processing epoch 01:  85%|████████▌ | 4146/4850 [19:53<03:21,  3.49it/s]

Epoch: 2, Loss: 2.34647536277771


Processing epoch 01:  86%|████████▌ | 4147/4850 [19:53<03:25,  3.42it/s]

Epoch: 2, Loss: 2.3663036823272705


Processing epoch 01:  86%|████████▌ | 4148/4850 [19:53<03:26,  3.40it/s]

Epoch: 2, Loss: 2.5863587856292725


Processing epoch 01:  86%|████████▌ | 4149/4850 [19:54<03:29,  3.34it/s]

Epoch: 2, Loss: 2.2947545051574707


Processing epoch 01:  86%|████████▌ | 4150/4850 [19:54<03:30,  3.32it/s]

Epoch: 2, Loss: 2.0010082721710205


Processing epoch 01:  86%|████████▌ | 4151/4850 [19:54<03:28,  3.35it/s]

Epoch: 2, Loss: 1.6664376258850098


Processing epoch 01:  86%|████████▌ | 4152/4850 [19:54<03:26,  3.38it/s]

Epoch: 2, Loss: 2.414201021194458


Processing epoch 01:  86%|████████▌ | 4153/4850 [19:55<03:28,  3.34it/s]

Epoch: 2, Loss: 2.3484466075897217


Processing epoch 01:  86%|████████▌ | 4154/4850 [19:55<03:25,  3.39it/s]

Epoch: 2, Loss: 2.323296546936035


Processing epoch 01:  86%|████████▌ | 4155/4850 [19:55<03:22,  3.43it/s]

Epoch: 2, Loss: 2.508863687515259


Processing epoch 01:  86%|████████▌ | 4156/4850 [19:56<03:21,  3.45it/s]

Epoch: 2, Loss: 1.9974021911621094


Processing epoch 01:  86%|████████▌ | 4157/4850 [19:56<03:18,  3.49it/s]

Epoch: 2, Loss: 4.220335006713867


Processing epoch 01:  86%|████████▌ | 4158/4850 [19:56<03:16,  3.51it/s]

Epoch: 2, Loss: 3.548511028289795


Processing epoch 01:  86%|████████▌ | 4159/4850 [19:56<03:16,  3.52it/s]

Epoch: 2, Loss: 2.4047369956970215


Processing epoch 01:  86%|████████▌ | 4160/4850 [19:57<03:16,  3.52it/s]

Epoch: 2, Loss: 2.587775707244873


Processing epoch 01:  86%|████████▌ | 4161/4850 [19:57<03:17,  3.49it/s]

Epoch: 2, Loss: 2.624708414077759


Processing epoch 01:  86%|████████▌ | 4162/4850 [19:57<03:15,  3.52it/s]

Epoch: 2, Loss: 3.5072708129882812


Processing epoch 01:  86%|████████▌ | 4163/4850 [19:58<03:14,  3.53it/s]

Epoch: 2, Loss: 2.5299034118652344


Processing epoch 01:  86%|████████▌ | 4164/4850 [19:58<03:14,  3.53it/s]

Epoch: 2, Loss: 3.0256032943725586


Processing epoch 01:  86%|████████▌ | 4165/4850 [19:58<03:15,  3.50it/s]

Epoch: 2, Loss: 3.007664203643799


Processing epoch 01:  86%|████████▌ | 4166/4850 [19:58<03:14,  3.51it/s]

Epoch: 2, Loss: 2.5758399963378906


Processing epoch 01:  86%|████████▌ | 4167/4850 [19:59<03:16,  3.48it/s]

Epoch: 2, Loss: 2.2158966064453125


Processing epoch 01:  86%|████████▌ | 4168/4850 [19:59<03:15,  3.49it/s]

Epoch: 2, Loss: 2.3047056198120117


Processing epoch 01:  86%|████████▌ | 4169/4850 [19:59<03:14,  3.50it/s]

Epoch: 2, Loss: 2.0505456924438477


Processing epoch 01:  86%|████████▌ | 4170/4850 [20:00<03:13,  3.52it/s]

Epoch: 2, Loss: 2.66973876953125


Processing epoch 01:  86%|████████▌ | 4171/4850 [20:00<03:12,  3.53it/s]

Epoch: 2, Loss: 2.772671699523926


Processing epoch 01:  86%|████████▌ | 4172/4850 [20:00<03:13,  3.50it/s]

Epoch: 2, Loss: 1.7894799709320068


Processing epoch 01:  86%|████████▌ | 4173/4850 [20:00<03:13,  3.50it/s]

Epoch: 2, Loss: 2.2858643531799316


Processing epoch 01:  86%|████████▌ | 4174/4850 [20:01<03:13,  3.50it/s]

Epoch: 2, Loss: 2.216761350631714


Processing epoch 01:  86%|████████▌ | 4175/4850 [20:01<03:11,  3.52it/s]

Epoch: 2, Loss: 2.8025898933410645


Processing epoch 01:  86%|████████▌ | 4176/4850 [20:01<03:11,  3.51it/s]

Epoch: 2, Loss: 2.5379786491394043


Processing epoch 01:  86%|████████▌ | 4177/4850 [20:02<03:11,  3.51it/s]

Epoch: 2, Loss: 2.393606662750244


Processing epoch 01:  86%|████████▌ | 4178/4850 [20:02<03:11,  3.52it/s]

Epoch: 2, Loss: 2.2094438076019287


Processing epoch 01:  86%|████████▌ | 4179/4850 [20:02<03:11,  3.50it/s]

Epoch: 2, Loss: 2.4049696922302246


Processing epoch 01:  86%|████████▌ | 4180/4850 [20:02<03:11,  3.49it/s]

Epoch: 2, Loss: 2.549198627471924


Processing epoch 01:  86%|████████▌ | 4181/4850 [20:03<03:12,  3.48it/s]

Epoch: 2, Loss: 2.3794217109680176


Processing epoch 01:  86%|████████▌ | 4182/4850 [20:03<03:10,  3.51it/s]

Epoch: 2, Loss: 2.9056925773620605


Processing epoch 01:  86%|████████▌ | 4183/4850 [20:03<03:13,  3.45it/s]

Epoch: 2, Loss: 2.3602538108825684


Processing epoch 01:  86%|████████▋ | 4184/4850 [20:04<03:12,  3.47it/s]

Epoch: 2, Loss: 2.334820032119751


Processing epoch 01:  86%|████████▋ | 4185/4850 [20:04<03:10,  3.48it/s]

Epoch: 2, Loss: 2.2942776679992676


Processing epoch 01:  86%|████████▋ | 4186/4850 [20:04<03:11,  3.47it/s]

Epoch: 2, Loss: 2.0980279445648193


Processing epoch 01:  86%|████████▋ | 4187/4850 [20:05<03:10,  3.49it/s]

Epoch: 2, Loss: 2.5442867279052734


Processing epoch 01:  86%|████████▋ | 4188/4850 [20:05<03:09,  3.49it/s]

Epoch: 2, Loss: 3.1294379234313965


Processing epoch 01:  86%|████████▋ | 4189/4850 [20:05<03:12,  3.43it/s]

Epoch: 2, Loss: 2.4689016342163086


Processing epoch 01:  86%|████████▋ | 4190/4850 [20:05<03:14,  3.39it/s]

Epoch: 2, Loss: 2.466917037963867


Processing epoch 01:  86%|████████▋ | 4191/4850 [20:06<03:15,  3.37it/s]

Epoch: 2, Loss: 2.319133758544922


Processing epoch 01:  86%|████████▋ | 4192/4850 [20:06<03:17,  3.33it/s]

Epoch: 2, Loss: 1.9763782024383545


Processing epoch 01:  86%|████████▋ | 4193/4850 [20:06<03:16,  3.34it/s]

Epoch: 2, Loss: 2.6240711212158203


Processing epoch 01:  86%|████████▋ | 4194/4850 [20:07<03:15,  3.35it/s]

Epoch: 2, Loss: 2.525468349456787


Processing epoch 01:  86%|████████▋ | 4195/4850 [20:07<03:15,  3.36it/s]

Epoch: 2, Loss: 2.422928810119629


Processing epoch 01:  87%|████████▋ | 4196/4850 [20:07<03:15,  3.34it/s]

Epoch: 2, Loss: 2.547560691833496


Processing epoch 01:  87%|████████▋ | 4197/4850 [20:07<03:11,  3.40it/s]

Epoch: 2, Loss: 2.193793296813965


Processing epoch 01:  87%|████████▋ | 4198/4850 [20:08<03:14,  3.35it/s]

Epoch: 2, Loss: 2.3404417037963867


Processing epoch 01:  87%|████████▋ | 4199/4850 [20:08<03:15,  3.32it/s]

Epoch: 2, Loss: 2.3934926986694336


Processing epoch 01:  87%|████████▋ | 4200/4850 [20:08<03:16,  3.31it/s]

Epoch: 2, Loss: 2.2182226181030273


Processing epoch 01:  87%|████████▋ | 4201/4850 [20:09<03:17,  3.29it/s]

Epoch: 2, Loss: 2.722048044204712


Processing epoch 01:  87%|████████▋ | 4202/4850 [20:09<03:13,  3.35it/s]

Epoch: 2, Loss: 2.1096606254577637


Processing epoch 01:  87%|████████▋ | 4203/4850 [20:09<03:13,  3.35it/s]

Epoch: 2, Loss: 2.574014663696289


Processing epoch 01:  87%|████████▋ | 4204/4850 [20:10<03:16,  3.29it/s]

Epoch: 2, Loss: 2.0853214263916016


Processing epoch 01:  87%|████████▋ | 4205/4850 [20:10<03:17,  3.27it/s]

Epoch: 2, Loss: 2.073961019515991


Processing epoch 01:  87%|████████▋ | 4206/4850 [20:10<03:12,  3.34it/s]

Epoch: 2, Loss: 2.463268280029297


Processing epoch 01:  87%|████████▋ | 4207/4850 [20:10<03:09,  3.39it/s]

Epoch: 2, Loss: 2.56778883934021


Processing epoch 01:  87%|████████▋ | 4208/4850 [20:11<03:09,  3.39it/s]

Epoch: 2, Loss: 3.1995553970336914


Processing epoch 01:  87%|████████▋ | 4209/4850 [20:11<03:07,  3.43it/s]

Epoch: 2, Loss: 2.386697769165039


Processing epoch 01:  87%|████████▋ | 4210/4850 [20:11<03:05,  3.46it/s]

Epoch: 2, Loss: 2.516496181488037


Processing epoch 01:  87%|████████▋ | 4211/4850 [20:12<03:03,  3.48it/s]

Epoch: 2, Loss: 2.1870956420898438


Processing epoch 01:  87%|████████▋ | 4212/4850 [20:12<03:03,  3.49it/s]

Epoch: 2, Loss: 2.742201805114746


Processing epoch 01:  87%|████████▋ | 4213/4850 [20:12<03:02,  3.50it/s]

Epoch: 2, Loss: 2.7522377967834473


Processing epoch 01:  87%|████████▋ | 4214/4850 [20:12<03:01,  3.51it/s]

Epoch: 2, Loss: 2.146315813064575


Processing epoch 01:  87%|████████▋ | 4215/4850 [20:13<03:00,  3.51it/s]

Epoch: 2, Loss: 3.6086554527282715


Processing epoch 01:  87%|████████▋ | 4216/4850 [20:13<02:59,  3.52it/s]

Epoch: 2, Loss: 2.337984085083008


Processing epoch 01:  87%|████████▋ | 4217/4850 [20:13<03:00,  3.52it/s]

Epoch: 2, Loss: 2.90236496925354


Processing epoch 01:  87%|████████▋ | 4218/4850 [20:14<03:00,  3.50it/s]

Epoch: 2, Loss: 2.3051939010620117


Processing epoch 01:  87%|████████▋ | 4219/4850 [20:14<03:01,  3.48it/s]

Epoch: 2, Loss: 2.464425563812256


Processing epoch 01:  87%|████████▋ | 4220/4850 [20:14<03:01,  3.48it/s]

Epoch: 2, Loss: 2.7872278690338135


Processing epoch 01:  87%|████████▋ | 4221/4850 [20:14<03:00,  3.48it/s]

Epoch: 2, Loss: 2.4629645347595215


Processing epoch 01:  87%|████████▋ | 4222/4850 [20:15<03:00,  3.49it/s]

Epoch: 2, Loss: 2.431426525115967


Processing epoch 01:  87%|████████▋ | 4223/4850 [20:15<02:58,  3.52it/s]

Epoch: 2, Loss: 3.0542287826538086


Processing epoch 01:  87%|████████▋ | 4224/4850 [20:15<02:57,  3.52it/s]

Epoch: 2, Loss: 2.412564516067505


Processing epoch 01:  87%|████████▋ | 4225/4850 [20:16<02:58,  3.51it/s]

Epoch: 2, Loss: 2.3560433387756348


Processing epoch 01:  87%|████████▋ | 4226/4850 [20:16<02:57,  3.51it/s]

Epoch: 2, Loss: 2.655259609222412


Processing epoch 01:  87%|████████▋ | 4227/4850 [20:16<02:57,  3.51it/s]

Epoch: 2, Loss: 2.031048536300659


Processing epoch 01:  87%|████████▋ | 4228/4850 [20:16<02:58,  3.48it/s]

Epoch: 2, Loss: 2.4258337020874023


Processing epoch 01:  87%|████████▋ | 4229/4850 [20:17<02:57,  3.50it/s]

Epoch: 2, Loss: 2.741128921508789


Processing epoch 01:  87%|████████▋ | 4230/4850 [20:17<02:57,  3.50it/s]

Epoch: 2, Loss: 2.474727153778076


Processing epoch 01:  87%|████████▋ | 4231/4850 [20:17<02:56,  3.51it/s]

Epoch: 2, Loss: 2.522732734680176


Processing epoch 01:  87%|████████▋ | 4232/4850 [20:18<02:56,  3.51it/s]

Epoch: 2, Loss: 2.3218955993652344


Processing epoch 01:  87%|████████▋ | 4233/4850 [20:18<02:55,  3.52it/s]

Epoch: 2, Loss: 2.8297853469848633


Processing epoch 01:  87%|████████▋ | 4234/4850 [20:18<02:55,  3.52it/s]

Epoch: 2, Loss: 1.8424973487854004


Processing epoch 01:  87%|████████▋ | 4235/4850 [20:18<02:54,  3.53it/s]

Epoch: 2, Loss: 3.813286304473877


Processing epoch 01:  87%|████████▋ | 4236/4850 [20:19<02:55,  3.49it/s]

Epoch: 2, Loss: 2.4432339668273926


Processing epoch 01:  87%|████████▋ | 4237/4850 [20:19<02:58,  3.44it/s]

Epoch: 2, Loss: 2.9557008743286133


Processing epoch 01:  87%|████████▋ | 4238/4850 [20:19<02:57,  3.45it/s]

Epoch: 2, Loss: 2.1682052612304688


Processing epoch 01:  87%|████████▋ | 4239/4850 [20:20<02:56,  3.45it/s]

Epoch: 2, Loss: 2.3215410709381104


Processing epoch 01:  87%|████████▋ | 4240/4850 [20:20<02:55,  3.48it/s]

Epoch: 2, Loss: 2.482058048248291


Processing epoch 01:  87%|████████▋ | 4241/4850 [20:20<02:57,  3.44it/s]

Epoch: 2, Loss: 2.648325204849243


Processing epoch 01:  87%|████████▋ | 4242/4850 [20:21<02:58,  3.41it/s]

Epoch: 2, Loss: 2.6011714935302734


Processing epoch 01:  87%|████████▋ | 4243/4850 [20:21<02:59,  3.39it/s]

Epoch: 2, Loss: 2.1597697734832764


Processing epoch 01:  88%|████████▊ | 4244/4850 [20:21<03:01,  3.34it/s]

Epoch: 2, Loss: 2.217888116836548


Processing epoch 01:  88%|████████▊ | 4245/4850 [20:21<03:00,  3.35it/s]

Epoch: 2, Loss: 2.425166606903076


Processing epoch 01:  88%|████████▊ | 4246/4850 [20:22<02:56,  3.42it/s]

Epoch: 2, Loss: 2.3447415828704834


Processing epoch 01:  88%|████████▊ | 4247/4850 [20:22<02:54,  3.46it/s]

Epoch: 2, Loss: 2.4045588970184326


Processing epoch 01:  88%|████████▊ | 4248/4850 [20:22<02:54,  3.44it/s]

Epoch: 2, Loss: 2.869443655014038


Processing epoch 01:  88%|████████▊ | 4249/4850 [20:23<02:53,  3.46it/s]

Epoch: 2, Loss: 2.800060272216797


Processing epoch 01:  88%|████████▊ | 4250/4850 [20:23<02:52,  3.48it/s]

Epoch: 2, Loss: 1.9457447528839111


Processing epoch 01:  88%|████████▊ | 4251/4850 [20:23<02:53,  3.45it/s]

Epoch: 2, Loss: 2.3789072036743164


Processing epoch 01:  88%|████████▊ | 4252/4850 [20:23<02:54,  3.43it/s]

Epoch: 2, Loss: 2.3540825843811035


Processing epoch 01:  88%|████████▊ | 4253/4850 [20:24<02:53,  3.44it/s]

Epoch: 2, Loss: 2.5536985397338867


Processing epoch 01:  88%|████████▊ | 4254/4850 [20:24<02:52,  3.46it/s]

Epoch: 2, Loss: 2.37589168548584


Processing epoch 01:  88%|████████▊ | 4255/4850 [20:24<02:50,  3.48it/s]

Epoch: 2, Loss: 2.7007601261138916


Processing epoch 01:  88%|████████▊ | 4256/4850 [20:25<02:53,  3.43it/s]

Epoch: 2, Loss: 1.9378981590270996


Processing epoch 01:  88%|████████▊ | 4257/4850 [20:25<02:54,  3.40it/s]

Epoch: 2, Loss: 1.9520759582519531


Processing epoch 01:  88%|████████▊ | 4258/4850 [20:25<02:52,  3.44it/s]

Epoch: 2, Loss: 2.207719087600708


Processing epoch 01:  88%|████████▊ | 4259/4850 [20:25<02:50,  3.47it/s]

Epoch: 2, Loss: 2.9234097003936768


Processing epoch 01:  88%|████████▊ | 4260/4850 [20:26<02:49,  3.48it/s]

Epoch: 2, Loss: 2.414438247680664


Processing epoch 01:  88%|████████▊ | 4261/4850 [20:26<02:48,  3.50it/s]

Epoch: 2, Loss: 2.972445011138916


Processing epoch 01:  88%|████████▊ | 4262/4850 [20:26<02:48,  3.49it/s]

Epoch: 2, Loss: 2.744110345840454


Processing epoch 01:  88%|████████▊ | 4263/4850 [20:27<02:48,  3.49it/s]

Epoch: 2, Loss: 2.05605411529541


Processing epoch 01:  88%|████████▊ | 4264/4850 [20:27<02:48,  3.48it/s]

Epoch: 2, Loss: 2.1454949378967285


Processing epoch 01:  88%|████████▊ | 4265/4850 [20:27<02:47,  3.49it/s]

Epoch: 2, Loss: 2.6430373191833496


Processing epoch 01:  88%|████████▊ | 4266/4850 [20:27<02:47,  3.49it/s]

Epoch: 2, Loss: 2.3347320556640625


Processing epoch 01:  88%|████████▊ | 4267/4850 [20:28<02:46,  3.51it/s]

Epoch: 2, Loss: 2.605651378631592


Processing epoch 01:  88%|████████▊ | 4268/4850 [20:28<02:45,  3.52it/s]

Epoch: 2, Loss: 2.2365145683288574


Processing epoch 01:  88%|████████▊ | 4269/4850 [20:28<02:44,  3.54it/s]

Epoch: 2, Loss: 2.550917148590088


Processing epoch 01:  88%|████████▊ | 4270/4850 [20:29<02:46,  3.48it/s]

Epoch: 2, Loss: 2.742441415786743


Processing epoch 01:  88%|████████▊ | 4271/4850 [20:29<02:45,  3.51it/s]

Epoch: 2, Loss: 2.5426480770111084


Processing epoch 01:  88%|████████▊ | 4272/4850 [20:29<02:44,  3.51it/s]

Epoch: 2, Loss: 2.798668146133423


Processing epoch 01:  88%|████████▊ | 4273/4850 [20:29<02:43,  3.53it/s]

Epoch: 2, Loss: 1.9969103336334229


Processing epoch 01:  88%|████████▊ | 4274/4850 [20:30<02:42,  3.54it/s]

Epoch: 2, Loss: 2.0081663131713867


Processing epoch 01:  88%|████████▊ | 4275/4850 [20:30<02:42,  3.54it/s]

Epoch: 2, Loss: 2.3897008895874023


Processing epoch 01:  88%|████████▊ | 4276/4850 [20:30<02:42,  3.52it/s]

Epoch: 2, Loss: 2.5913007259368896


Processing epoch 01:  88%|████████▊ | 4277/4850 [20:31<02:43,  3.50it/s]

Epoch: 2, Loss: 2.2422103881835938


Processing epoch 01:  88%|████████▊ | 4278/4850 [20:31<02:42,  3.52it/s]

Epoch: 2, Loss: 2.589639663696289


Processing epoch 01:  88%|████████▊ | 4279/4850 [20:31<02:42,  3.52it/s]

Epoch: 2, Loss: 3.1674656867980957


Processing epoch 01:  88%|████████▊ | 4280/4850 [20:31<02:42,  3.51it/s]

Epoch: 2, Loss: 2.8792660236358643


Processing epoch 01:  88%|████████▊ | 4281/4850 [20:32<02:43,  3.48it/s]

Epoch: 2, Loss: 2.0703141689300537


Processing epoch 01:  88%|████████▊ | 4282/4850 [20:32<02:42,  3.49it/s]

Epoch: 2, Loss: 2.5794200897216797


Processing epoch 01:  88%|████████▊ | 4283/4850 [20:32<02:41,  3.52it/s]

Epoch: 2, Loss: 2.9354474544525146


Processing epoch 01:  88%|████████▊ | 4284/4850 [20:33<02:40,  3.52it/s]

Epoch: 2, Loss: 2.374518871307373


Processing epoch 01:  88%|████████▊ | 4285/4850 [20:33<02:40,  3.52it/s]

Epoch: 2, Loss: 2.1963486671447754


Processing epoch 01:  88%|████████▊ | 4286/4850 [20:33<02:38,  3.55it/s]

Epoch: 2, Loss: 3.715701103210449


Processing epoch 01:  88%|████████▊ | 4287/4850 [20:33<02:39,  3.53it/s]

Epoch: 2, Loss: 2.4168756008148193


Processing epoch 01:  88%|████████▊ | 4288/4850 [20:34<02:39,  3.52it/s]

Epoch: 2, Loss: 2.327404499053955


Processing epoch 01:  88%|████████▊ | 4289/4850 [20:34<02:40,  3.50it/s]

Epoch: 2, Loss: 2.2693986892700195


Processing epoch 01:  88%|████████▊ | 4290/4850 [20:34<02:40,  3.49it/s]

Epoch: 2, Loss: 2.10166335105896


Processing epoch 01:  88%|████████▊ | 4291/4850 [20:35<02:40,  3.49it/s]

Epoch: 2, Loss: 2.465895175933838


Processing epoch 01:  88%|████████▊ | 4292/4850 [20:35<02:41,  3.46it/s]

Epoch: 2, Loss: 2.090369701385498


Processing epoch 01:  89%|████████▊ | 4293/4850 [20:35<02:40,  3.46it/s]

Epoch: 2, Loss: 2.486515760421753


Processing epoch 01:  89%|████████▊ | 4294/4850 [20:35<02:42,  3.43it/s]

Epoch: 2, Loss: 2.2887887954711914


Processing epoch 01:  89%|████████▊ | 4295/4850 [20:36<02:41,  3.44it/s]

Epoch: 2, Loss: 3.0596680641174316


Processing epoch 01:  89%|████████▊ | 4296/4850 [20:36<02:39,  3.47it/s]

Epoch: 2, Loss: 2.2674217224121094


Processing epoch 01:  89%|████████▊ | 4297/4850 [20:36<02:40,  3.45it/s]

Epoch: 2, Loss: 2.441988945007324


Processing epoch 01:  89%|████████▊ | 4298/4850 [20:37<02:41,  3.42it/s]

Epoch: 2, Loss: 2.1081607341766357


Processing epoch 01:  89%|████████▊ | 4299/4850 [20:37<02:42,  3.39it/s]

Epoch: 2, Loss: 1.909690499305725


Processing epoch 01:  89%|████████▊ | 4300/4850 [20:37<02:47,  3.28it/s]

Epoch: 2, Loss: 2.3550891876220703


Processing epoch 01:  89%|████████▊ | 4301/4850 [20:38<02:45,  3.32it/s]

Epoch: 2, Loss: 2.19596004486084


Processing epoch 01:  89%|████████▊ | 4302/4850 [20:38<02:42,  3.38it/s]

Epoch: 2, Loss: 2.2741270065307617


Processing epoch 01:  89%|████████▊ | 4303/4850 [20:38<02:42,  3.37it/s]

Epoch: 2, Loss: 2.8138344287872314


Processing epoch 01:  89%|████████▊ | 4304/4850 [20:38<02:43,  3.34it/s]

Epoch: 2, Loss: 1.6935553550720215


Processing epoch 01:  89%|████████▉ | 4305/4850 [20:39<02:44,  3.32it/s]

Epoch: 2, Loss: 2.762010335922241


Processing epoch 01:  89%|████████▉ | 4306/4850 [20:39<02:43,  3.32it/s]

Epoch: 2, Loss: 1.6511664390563965


Processing epoch 01:  89%|████████▉ | 4307/4850 [20:39<02:42,  3.34it/s]

Epoch: 2, Loss: 2.2048163414001465


Processing epoch 01:  89%|████████▉ | 4308/4850 [20:40<02:39,  3.41it/s]

Epoch: 2, Loss: 3.081913948059082


Processing epoch 01:  89%|████████▉ | 4309/4850 [20:40<02:37,  3.44it/s]

Epoch: 2, Loss: 2.194103956222534


Processing epoch 01:  89%|████████▉ | 4310/4850 [20:40<02:36,  3.45it/s]

Epoch: 2, Loss: 2.220564365386963


Processing epoch 01:  89%|████████▉ | 4311/4850 [20:40<02:34,  3.49it/s]

Epoch: 2, Loss: 2.1079227924346924


Processing epoch 01:  89%|████████▉ | 4312/4850 [20:41<02:33,  3.50it/s]

Epoch: 2, Loss: 2.651201009750366


Processing epoch 01:  89%|████████▉ | 4313/4850 [20:41<02:33,  3.49it/s]

Epoch: 2, Loss: 2.716099977493286


Processing epoch 01:  89%|████████▉ | 4314/4850 [20:41<02:33,  3.49it/s]

Epoch: 2, Loss: 2.2627804279327393


Processing epoch 01:  89%|████████▉ | 4315/4850 [20:42<02:33,  3.49it/s]

Epoch: 2, Loss: 2.266040325164795


Processing epoch 01:  89%|████████▉ | 4316/4850 [20:42<02:33,  3.48it/s]

Epoch: 2, Loss: 2.4502310752868652


Processing epoch 01:  89%|████████▉ | 4317/4850 [20:42<02:34,  3.44it/s]

Epoch: 2, Loss: 2.3999390602111816


Processing epoch 01:  89%|████████▉ | 4318/4850 [20:42<02:32,  3.49it/s]

Epoch: 2, Loss: 2.8714687824249268


Processing epoch 01:  89%|████████▉ | 4319/4850 [20:43<02:31,  3.50it/s]

Epoch: 2, Loss: 2.3678078651428223


Processing epoch 01:  89%|████████▉ | 4320/4850 [20:43<02:31,  3.51it/s]

Epoch: 2, Loss: 2.339993476867676


Processing epoch 01:  89%|████████▉ | 4321/4850 [20:43<02:33,  3.45it/s]

Epoch: 2, Loss: 2.3026957511901855


Processing epoch 01:  89%|████████▉ | 4322/4850 [20:44<02:31,  3.48it/s]

Epoch: 2, Loss: 2.329892873764038


Processing epoch 01:  89%|████████▉ | 4323/4850 [20:44<02:30,  3.50it/s]

Epoch: 2, Loss: 2.4072365760803223


Processing epoch 01:  89%|████████▉ | 4324/4850 [20:44<02:29,  3.51it/s]

Epoch: 2, Loss: 2.4326539039611816


Processing epoch 01:  89%|████████▉ | 4325/4850 [20:44<02:29,  3.52it/s]

Epoch: 2, Loss: 2.264174699783325


Processing epoch 01:  89%|████████▉ | 4326/4850 [20:45<02:29,  3.52it/s]

Epoch: 2, Loss: 2.335958957672119


Processing epoch 01:  89%|████████▉ | 4327/4850 [20:45<02:27,  3.54it/s]

Epoch: 2, Loss: 2.713390350341797


Processing epoch 01:  89%|████████▉ | 4328/4850 [20:45<02:27,  3.54it/s]

Epoch: 2, Loss: 2.387216567993164


Processing epoch 01:  89%|████████▉ | 4329/4850 [20:46<02:26,  3.55it/s]

Epoch: 2, Loss: 2.192736864089966


Processing epoch 01:  89%|████████▉ | 4330/4850 [20:46<02:30,  3.45it/s]

Epoch: 2, Loss: 1.9134026765823364


Processing epoch 01:  89%|████████▉ | 4331/4850 [20:46<02:29,  3.46it/s]

Epoch: 2, Loss: 2.1612749099731445


Processing epoch 01:  89%|████████▉ | 4332/4850 [20:46<02:31,  3.42it/s]

Epoch: 2, Loss: 2.3513078689575195


Processing epoch 01:  89%|████████▉ | 4333/4850 [20:47<02:30,  3.44it/s]

Epoch: 2, Loss: 2.2092583179473877


Processing epoch 01:  89%|████████▉ | 4334/4850 [20:47<02:28,  3.47it/s]

Epoch: 2, Loss: 2.4891343116760254


Processing epoch 01:  89%|████████▉ | 4335/4850 [20:47<02:27,  3.48it/s]

Epoch: 2, Loss: 2.0793089866638184


Processing epoch 01:  89%|████████▉ | 4336/4850 [20:48<02:27,  3.49it/s]

Epoch: 2, Loss: 2.452543258666992


Processing epoch 01:  89%|████████▉ | 4337/4850 [20:48<02:26,  3.50it/s]

Epoch: 2, Loss: 2.5695347785949707


Processing epoch 01:  89%|████████▉ | 4338/4850 [20:48<02:26,  3.50it/s]

Epoch: 2, Loss: 2.0462610721588135


Processing epoch 01:  89%|████████▉ | 4339/4850 [20:48<02:25,  3.50it/s]

Epoch: 2, Loss: 2.2536072731018066


Processing epoch 01:  89%|████████▉ | 4340/4850 [20:49<02:26,  3.49it/s]

Epoch: 2, Loss: 2.3294034004211426


Processing epoch 01:  90%|████████▉ | 4341/4850 [20:49<02:25,  3.49it/s]

Epoch: 2, Loss: 2.1027820110321045


Processing epoch 01:  90%|████████▉ | 4342/4850 [20:49<02:25,  3.49it/s]

Epoch: 2, Loss: 2.471169948577881


Processing epoch 01:  90%|████████▉ | 4343/4850 [20:50<02:27,  3.43it/s]

Epoch: 2, Loss: 2.4510982036590576


Processing epoch 01:  90%|████████▉ | 4344/4850 [20:50<02:29,  3.39it/s]

Epoch: 2, Loss: 2.5117530822753906


Processing epoch 01:  90%|████████▉ | 4345/4850 [20:50<02:29,  3.37it/s]

Epoch: 2, Loss: 3.010195255279541


Processing epoch 01:  90%|████████▉ | 4346/4850 [20:51<02:28,  3.40it/s]

Epoch: 2, Loss: 2.2831978797912598


Processing epoch 01:  90%|████████▉ | 4347/4850 [20:51<02:28,  3.40it/s]

Epoch: 2, Loss: 2.607046604156494


Processing epoch 01:  90%|████████▉ | 4348/4850 [20:51<02:25,  3.46it/s]

Epoch: 2, Loss: 2.920318126678467


Processing epoch 01:  90%|████████▉ | 4349/4850 [20:51<02:23,  3.48it/s]

Epoch: 2, Loss: 2.739685535430908


Processing epoch 01:  90%|████████▉ | 4350/4850 [20:52<02:28,  3.37it/s]

Epoch: 2, Loss: 2.322385311126709


Processing epoch 01:  90%|████████▉ | 4351/4850 [20:52<02:30,  3.32it/s]

Epoch: 2, Loss: 2.4863038063049316


Processing epoch 01:  90%|████████▉ | 4352/4850 [20:52<02:27,  3.39it/s]

Epoch: 2, Loss: 2.50825572013855


Processing epoch 01:  90%|████████▉ | 4353/4850 [20:53<02:27,  3.37it/s]

Epoch: 2, Loss: 2.4304535388946533


Processing epoch 01:  90%|████████▉ | 4354/4850 [20:53<02:25,  3.40it/s]

Epoch: 2, Loss: 2.2851972579956055


Processing epoch 01:  90%|████████▉ | 4355/4850 [20:53<02:28,  3.34it/s]

Epoch: 2, Loss: 2.2189810276031494


Processing epoch 01:  90%|████████▉ | 4356/4850 [20:54<02:28,  3.34it/s]

Epoch: 2, Loss: 2.2564315795898438


Processing epoch 01:  90%|████████▉ | 4357/4850 [20:54<02:27,  3.35it/s]

Epoch: 2, Loss: 2.2245354652404785


Processing epoch 01:  90%|████████▉ | 4358/4850 [20:54<02:26,  3.36it/s]

Epoch: 2, Loss: 2.4689605236053467


Processing epoch 01:  90%|████████▉ | 4359/4850 [20:54<02:27,  3.33it/s]

Epoch: 2, Loss: 2.570850372314453


Processing epoch 01:  90%|████████▉ | 4360/4850 [20:55<02:25,  3.38it/s]

Epoch: 2, Loss: 2.452636957168579


Processing epoch 01:  90%|████████▉ | 4361/4850 [20:55<02:22,  3.44it/s]

Epoch: 2, Loss: 2.6463801860809326


Processing epoch 01:  90%|████████▉ | 4362/4850 [20:55<02:21,  3.46it/s]

Epoch: 2, Loss: 1.9729573726654053


Processing epoch 01:  90%|████████▉ | 4363/4850 [20:56<02:19,  3.48it/s]

Epoch: 2, Loss: 2.6807467937469482


Processing epoch 01:  90%|████████▉ | 4364/4850 [20:56<02:21,  3.44it/s]

Epoch: 2, Loss: 2.1141762733459473


Processing epoch 01:  90%|█████████ | 4365/4850 [20:56<02:20,  3.46it/s]

Epoch: 2, Loss: 2.7778477668762207


Processing epoch 01:  90%|█████████ | 4366/4850 [20:56<02:19,  3.48it/s]

Epoch: 2, Loss: 2.5383896827697754


Processing epoch 01:  90%|█████████ | 4367/4850 [20:57<02:18,  3.49it/s]

Epoch: 2, Loss: 2.2900009155273438


Processing epoch 01:  90%|█████████ | 4368/4850 [20:57<02:20,  3.43it/s]

Epoch: 2, Loss: 2.8417835235595703


Processing epoch 01:  90%|█████████ | 4369/4850 [20:57<02:19,  3.45it/s]

Epoch: 2, Loss: 2.389967441558838


Processing epoch 01:  90%|█████████ | 4370/4850 [20:58<02:18,  3.46it/s]

Epoch: 2, Loss: 2.089890718460083


Processing epoch 01:  90%|█████████ | 4371/4850 [20:58<02:17,  3.48it/s]

Epoch: 2, Loss: 2.43399715423584


Processing epoch 01:  90%|█████████ | 4372/4850 [20:58<02:17,  3.49it/s]

Epoch: 2, Loss: 2.1523568630218506


Processing epoch 01:  90%|█████████ | 4373/4850 [20:58<02:15,  3.51it/s]

Epoch: 2, Loss: 2.942830801010132


Processing epoch 01:  90%|█████████ | 4374/4850 [20:59<02:15,  3.51it/s]

Epoch: 2, Loss: 2.6169018745422363


Processing epoch 01:  90%|█████████ | 4375/4850 [20:59<02:15,  3.50it/s]

Epoch: 2, Loss: 2.017277717590332


Processing epoch 01:  90%|█████████ | 4376/4850 [20:59<02:15,  3.51it/s]

Epoch: 2, Loss: 2.166731357574463


Processing epoch 01:  90%|█████████ | 4377/4850 [21:00<02:14,  3.52it/s]

Epoch: 2, Loss: 2.5001420974731445


Processing epoch 01:  90%|█████████ | 4378/4850 [21:00<02:14,  3.52it/s]

Epoch: 2, Loss: 2.2150659561157227


Processing epoch 01:  90%|█████████ | 4379/4850 [21:00<02:15,  3.48it/s]

Epoch: 2, Loss: 2.496709108352661


Processing epoch 01:  90%|█████████ | 4380/4850 [21:00<02:14,  3.49it/s]

Epoch: 2, Loss: 1.8949089050292969


Processing epoch 01:  90%|█████████ | 4381/4850 [21:01<02:14,  3.49it/s]

Epoch: 2, Loss: 2.2547905445098877


Processing epoch 01:  90%|█████████ | 4382/4850 [21:01<02:13,  3.50it/s]

Epoch: 2, Loss: 2.3464431762695312


Processing epoch 01:  90%|█████████ | 4383/4850 [21:01<02:13,  3.49it/s]

Epoch: 2, Loss: 2.311211109161377


Processing epoch 01:  90%|█████████ | 4384/4850 [21:02<02:13,  3.50it/s]

Epoch: 2, Loss: 2.504027843475342


Processing epoch 01:  90%|█████████ | 4385/4850 [21:02<02:13,  3.50it/s]

Epoch: 2, Loss: 2.100996255874634


Processing epoch 01:  90%|█████████ | 4386/4850 [21:02<02:12,  3.51it/s]

Epoch: 2, Loss: 3.1479570865631104


Processing epoch 01:  90%|█████████ | 4387/4850 [21:02<02:11,  3.51it/s]

Epoch: 2, Loss: 2.416555166244507


Processing epoch 01:  90%|█████████ | 4388/4850 [21:03<02:12,  3.50it/s]

Epoch: 2, Loss: 1.9094898700714111


Processing epoch 01:  90%|█████████ | 4389/4850 [21:03<02:11,  3.51it/s]

Epoch: 2, Loss: 2.2779407501220703


Processing epoch 01:  91%|█████████ | 4390/4850 [21:03<02:12,  3.48it/s]

Epoch: 2, Loss: 2.593773365020752


Processing epoch 01:  91%|█████████ | 4391/4850 [21:04<02:11,  3.50it/s]

Epoch: 2, Loss: 2.3201558589935303


Processing epoch 01:  91%|█████████ | 4392/4850 [21:04<02:11,  3.49it/s]

Epoch: 2, Loss: 2.573550224304199


Processing epoch 01:  91%|█████████ | 4393/4850 [21:04<02:10,  3.49it/s]

Epoch: 2, Loss: 2.895648717880249


Processing epoch 01:  91%|█████████ | 4394/4850 [21:04<02:10,  3.49it/s]

Epoch: 2, Loss: 2.5783658027648926


Processing epoch 01:  91%|█████████ | 4395/4850 [21:05<02:13,  3.41it/s]

Epoch: 2, Loss: 2.613079309463501


Processing epoch 01:  91%|█████████ | 4396/4850 [21:05<02:11,  3.45it/s]

Epoch: 2, Loss: 2.06003475189209


Processing epoch 01:  91%|█████████ | 4397/4850 [21:05<02:13,  3.40it/s]

Epoch: 2, Loss: 3.277320384979248


Processing epoch 01:  91%|█████████ | 4398/4850 [21:06<02:11,  3.43it/s]

Epoch: 2, Loss: 2.919114589691162


Processing epoch 01:  91%|█████████ | 4399/4850 [21:06<02:14,  3.36it/s]

Epoch: 2, Loss: 2.6979150772094727


Processing epoch 01:  91%|█████████ | 4400/4850 [21:06<02:14,  3.34it/s]

Epoch: 2, Loss: 2.2993040084838867


Processing epoch 01:  91%|█████████ | 4401/4850 [21:07<02:16,  3.28it/s]

Epoch: 2, Loss: 2.424037218093872


Processing epoch 01:  91%|█████████ | 4402/4850 [21:07<02:16,  3.27it/s]

Epoch: 2, Loss: 2.289451837539673


Processing epoch 01:  91%|█████████ | 4403/4850 [21:07<02:16,  3.28it/s]

Epoch: 2, Loss: 2.7414910793304443


Processing epoch 01:  91%|█████████ | 4404/4850 [21:07<02:13,  3.33it/s]

Epoch: 2, Loss: 2.3644607067108154


Processing epoch 01:  91%|█████████ | 4405/4850 [21:08<02:11,  3.38it/s]

Epoch: 2, Loss: 3.5451107025146484


Processing epoch 01:  91%|█████████ | 4406/4850 [21:08<02:13,  3.31it/s]

Epoch: 2, Loss: 2.244276523590088


Processing epoch 01:  91%|█████████ | 4407/4850 [21:08<02:12,  3.33it/s]

Epoch: 2, Loss: 2.131382465362549


Processing epoch 01:  91%|█████████ | 4408/4850 [21:09<02:15,  3.26it/s]

Epoch: 2, Loss: 2.736233711242676


Processing epoch 01:  91%|█████████ | 4409/4850 [21:09<02:14,  3.29it/s]

Epoch: 2, Loss: 2.0417304039001465


Processing epoch 01:  91%|█████████ | 4410/4850 [21:09<02:15,  3.24it/s]

Epoch: 2, Loss: 2.3171801567077637


Processing epoch 01:  91%|█████████ | 4411/4850 [21:10<02:12,  3.30it/s]

Epoch: 2, Loss: 2.768864393234253


Processing epoch 01:  91%|█████████ | 4412/4850 [21:10<02:10,  3.37it/s]

Epoch: 2, Loss: 2.6192398071289062


Processing epoch 01:  91%|█████████ | 4413/4850 [21:10<02:08,  3.41it/s]

Epoch: 2, Loss: 3.611375570297241


Processing epoch 01:  91%|█████████ | 4414/4850 [21:10<02:06,  3.43it/s]

Epoch: 2, Loss: 2.055150032043457


Processing epoch 01:  91%|█████████ | 4415/4850 [21:11<02:06,  3.44it/s]

Epoch: 2, Loss: 2.405808925628662


Processing epoch 01:  91%|█████████ | 4416/4850 [21:11<02:05,  3.45it/s]

Epoch: 2, Loss: 2.224867343902588


Processing epoch 01:  91%|█████████ | 4417/4850 [21:11<02:05,  3.46it/s]

Epoch: 2, Loss: 2.445246458053589


Processing epoch 01:  91%|█████████ | 4418/4850 [21:12<02:04,  3.47it/s]

Epoch: 2, Loss: 2.401197671890259


Processing epoch 01:  91%|█████████ | 4419/4850 [21:12<02:03,  3.49it/s]

Epoch: 2, Loss: 2.6833372116088867


Processing epoch 01:  91%|█████████ | 4420/4850 [21:12<02:02,  3.50it/s]

Epoch: 2, Loss: 2.6109423637390137


Processing epoch 01:  91%|█████████ | 4421/4850 [21:12<02:02,  3.51it/s]

Epoch: 2, Loss: 1.936930537223816


Processing epoch 01:  91%|█████████ | 4422/4850 [21:13<02:03,  3.47it/s]

Epoch: 2, Loss: 2.5785274505615234


Processing epoch 01:  91%|█████████ | 4423/4850 [21:13<02:02,  3.49it/s]

Epoch: 2, Loss: 2.5505223274230957


Processing epoch 01:  91%|█████████ | 4424/4850 [21:13<02:02,  3.49it/s]

Epoch: 2, Loss: 2.2375357151031494


Processing epoch 01:  91%|█████████ | 4425/4850 [21:14<02:01,  3.49it/s]

Epoch: 2, Loss: 2.298234224319458


Processing epoch 01:  91%|█████████▏| 4426/4850 [21:14<02:01,  3.49it/s]

Epoch: 2, Loss: 2.3925533294677734


Processing epoch 01:  91%|█████████▏| 4427/4850 [21:14<02:00,  3.51it/s]

Epoch: 2, Loss: 2.2666072845458984


Processing epoch 01:  91%|█████████▏| 4428/4850 [21:14<02:00,  3.51it/s]

Epoch: 2, Loss: 2.2690658569335938


Processing epoch 01:  91%|█████████▏| 4429/4850 [21:15<01:59,  3.52it/s]

Epoch: 2, Loss: 2.0220041275024414


Processing epoch 01:  91%|█████████▏| 4430/4850 [21:15<01:58,  3.53it/s]

Epoch: 2, Loss: 2.3418772220611572


Processing epoch 01:  91%|█████████▏| 4431/4850 [21:15<01:59,  3.51it/s]

Epoch: 2, Loss: 2.824648857116699


Processing epoch 01:  91%|█████████▏| 4432/4850 [21:16<01:58,  3.51it/s]

Epoch: 2, Loss: 2.41331148147583


Processing epoch 01:  91%|█████████▏| 4433/4850 [21:16<02:00,  3.47it/s]

Epoch: 2, Loss: 2.632136821746826


Processing epoch 01:  91%|█████████▏| 4434/4850 [21:16<01:59,  3.49it/s]

Epoch: 2, Loss: 3.3050594329833984


Processing epoch 01:  91%|█████████▏| 4435/4850 [21:16<01:58,  3.51it/s]

Epoch: 2, Loss: 2.9962496757507324


Processing epoch 01:  91%|█████████▏| 4436/4850 [21:17<01:57,  3.51it/s]

Epoch: 2, Loss: 2.1879804134368896


Processing epoch 01:  91%|█████████▏| 4437/4850 [21:17<01:57,  3.52it/s]

Epoch: 2, Loss: 2.4704484939575195


Processing epoch 01:  92%|█████████▏| 4438/4850 [21:17<01:56,  3.52it/s]

Epoch: 2, Loss: 2.347641944885254


Processing epoch 01:  92%|█████████▏| 4439/4850 [21:18<01:57,  3.51it/s]

Epoch: 2, Loss: 2.7685465812683105


Processing epoch 01:  92%|█████████▏| 4440/4850 [21:18<01:56,  3.51it/s]

Epoch: 2, Loss: 1.980083703994751


Processing epoch 01:  92%|█████████▏| 4441/4850 [21:18<01:56,  3.50it/s]

Epoch: 2, Loss: 2.3547258377075195


Processing epoch 01:  92%|█████████▏| 4442/4850 [21:18<01:56,  3.52it/s]

Epoch: 2, Loss: 2.8042893409729004


Processing epoch 01:  92%|█████████▏| 4443/4850 [21:19<01:55,  3.52it/s]

Epoch: 2, Loss: 2.529512643814087


Processing epoch 01:  92%|█████████▏| 4444/4850 [21:19<01:56,  3.48it/s]

Epoch: 2, Loss: 2.908019542694092


Processing epoch 01:  92%|█████████▏| 4445/4850 [21:19<01:55,  3.49it/s]

Epoch: 2, Loss: 2.283052921295166


Processing epoch 01:  92%|█████████▏| 4446/4850 [21:20<01:57,  3.44it/s]

Epoch: 2, Loss: 2.6572742462158203


Processing epoch 01:  92%|█████████▏| 4447/4850 [21:20<01:59,  3.38it/s]

Epoch: 2, Loss: 2.175974130630493


Processing epoch 01:  92%|█████████▏| 4448/4850 [21:20<01:59,  3.37it/s]

Epoch: 2, Loss: 2.6036953926086426


Processing epoch 01:  92%|█████████▏| 4449/4850 [21:20<01:58,  3.40it/s]

Epoch: 2, Loss: 2.836488723754883


Processing epoch 01:  92%|█████████▏| 4450/4850 [21:21<01:57,  3.39it/s]

Epoch: 2, Loss: 2.733236312866211


Processing epoch 01:  92%|█████████▏| 4451/4850 [21:21<01:58,  3.38it/s]

Epoch: 2, Loss: 2.506399393081665


Processing epoch 01:  92%|█████████▏| 4452/4850 [21:21<01:58,  3.37it/s]

Epoch: 2, Loss: 2.3608579635620117


Processing epoch 01:  92%|█████████▏| 4453/4850 [21:22<01:58,  3.35it/s]

Epoch: 2, Loss: 2.628509759902954


Processing epoch 01:  92%|█████████▏| 4454/4850 [21:22<01:58,  3.34it/s]

Epoch: 2, Loss: 2.359665632247925


Processing epoch 01:  92%|█████████▏| 4455/4850 [21:22<01:57,  3.37it/s]

Epoch: 2, Loss: 2.4175920486450195


Processing epoch 01:  92%|█████████▏| 4456/4850 [21:23<01:57,  3.36it/s]

Epoch: 2, Loss: 2.4988808631896973


Processing epoch 01:  92%|█████████▏| 4457/4850 [21:23<01:58,  3.32it/s]

Epoch: 2, Loss: 2.449232578277588


Processing epoch 01:  92%|█████████▏| 4458/4850 [21:23<01:58,  3.32it/s]

Epoch: 2, Loss: 2.521970272064209


Processing epoch 01:  92%|█████████▏| 4459/4850 [21:23<01:58,  3.31it/s]

Epoch: 2, Loss: 2.217829942703247


Processing epoch 01:  92%|█████████▏| 4460/4850 [21:24<01:57,  3.31it/s]

Epoch: 2, Loss: 2.2048354148864746


Processing epoch 01:  92%|█████████▏| 4461/4850 [21:24<01:57,  3.31it/s]

Epoch: 2, Loss: 2.6284592151641846


Processing epoch 01:  92%|█████████▏| 4462/4850 [21:24<01:57,  3.30it/s]

Epoch: 2, Loss: 2.864366054534912


Processing epoch 01:  92%|█████████▏| 4463/4850 [21:25<01:57,  3.30it/s]

Epoch: 2, Loss: 2.355771064758301


Processing epoch 01:  92%|█████████▏| 4464/4850 [21:25<01:57,  3.28it/s]

Epoch: 2, Loss: 2.6844000816345215


Processing epoch 01:  92%|█████████▏| 4465/4850 [21:25<01:55,  3.32it/s]

Epoch: 2, Loss: 1.9736714363098145


Processing epoch 01:  92%|█████████▏| 4466/4850 [21:26<01:55,  3.33it/s]

Epoch: 2, Loss: 2.277559995651245


Processing epoch 01:  92%|█████████▏| 4467/4850 [21:26<01:55,  3.30it/s]

Epoch: 2, Loss: 2.2107126712799072


Processing epoch 01:  92%|█████████▏| 4468/4850 [21:26<01:55,  3.30it/s]

Epoch: 2, Loss: 2.582359790802002


Processing epoch 01:  92%|█████████▏| 4469/4850 [21:26<01:53,  3.34it/s]

Epoch: 2, Loss: 2.7714743614196777


Processing epoch 01:  92%|█████████▏| 4470/4850 [21:27<01:51,  3.39it/s]

Epoch: 2, Loss: 3.234372854232788


Processing epoch 01:  92%|█████████▏| 4471/4850 [21:27<01:50,  3.42it/s]

Epoch: 2, Loss: 2.4408011436462402


Processing epoch 01:  92%|█████████▏| 4472/4850 [21:27<01:49,  3.45it/s]

Epoch: 2, Loss: 2.6307859420776367


Processing epoch 01:  92%|█████████▏| 4473/4850 [21:28<01:48,  3.48it/s]

Epoch: 2, Loss: 2.747282028198242


Processing epoch 01:  92%|█████████▏| 4474/4850 [21:28<01:47,  3.49it/s]

Epoch: 2, Loss: 2.297858238220215


Processing epoch 01:  92%|█████████▏| 4475/4850 [21:28<01:47,  3.50it/s]

Epoch: 2, Loss: 2.538541793823242


Processing epoch 01:  92%|█████████▏| 4476/4850 [21:28<01:46,  3.52it/s]

Epoch: 2, Loss: 2.1469709873199463


Processing epoch 01:  92%|█████████▏| 4477/4850 [21:29<01:45,  3.54it/s]

Epoch: 2, Loss: 2.85455060005188


Processing epoch 01:  92%|█████████▏| 4478/4850 [21:29<01:45,  3.52it/s]

Epoch: 2, Loss: 2.5398876667022705


Processing epoch 01:  92%|█████████▏| 4479/4850 [21:29<01:45,  3.51it/s]

Epoch: 2, Loss: 2.4076099395751953


Processing epoch 01:  92%|█████████▏| 4480/4850 [21:30<01:45,  3.49it/s]

Epoch: 2, Loss: 2.5021843910217285


Processing epoch 01:  92%|█████████▏| 4481/4850 [21:30<01:45,  3.49it/s]

Epoch: 2, Loss: 2.530555009841919


Processing epoch 01:  92%|█████████▏| 4482/4850 [21:30<01:45,  3.48it/s]

Epoch: 2, Loss: 2.3048317432403564


Processing epoch 01:  92%|█████████▏| 4483/4850 [21:30<01:45,  3.48it/s]

Epoch: 2, Loss: 2.863074541091919


Processing epoch 01:  92%|█████████▏| 4484/4850 [21:31<01:45,  3.48it/s]

Epoch: 2, Loss: 2.4722275733947754


Processing epoch 01:  92%|█████████▏| 4485/4850 [21:31<01:44,  3.49it/s]

Epoch: 2, Loss: 2.281142234802246


Processing epoch 01:  92%|█████████▏| 4486/4850 [21:31<01:44,  3.48it/s]

Epoch: 2, Loss: 2.587541103363037


Processing epoch 01:  93%|█████████▎| 4487/4850 [21:32<01:44,  3.49it/s]

Epoch: 2, Loss: 2.153470039367676


Processing epoch 01:  93%|█████████▎| 4488/4850 [21:32<01:42,  3.52it/s]

Epoch: 2, Loss: 2.8580100536346436


Processing epoch 01:  93%|█████████▎| 4489/4850 [21:32<01:42,  3.52it/s]

Epoch: 2, Loss: 2.4541172981262207


Processing epoch 01:  93%|█████████▎| 4490/4850 [21:32<01:42,  3.53it/s]

Epoch: 2, Loss: 2.5714492797851562


Processing epoch 01:  93%|█████████▎| 4491/4850 [21:33<01:43,  3.48it/s]

Epoch: 2, Loss: 2.2796006202697754


Processing epoch 01:  93%|█████████▎| 4492/4850 [21:33<01:42,  3.49it/s]

Epoch: 2, Loss: 2.6453957557678223


Processing epoch 01:  93%|█████████▎| 4493/4850 [21:33<01:41,  3.51it/s]

Epoch: 2, Loss: 2.1962828636169434


Processing epoch 01:  93%|█████████▎| 4494/4850 [21:34<01:41,  3.50it/s]

Epoch: 2, Loss: 2.4430360794067383


Processing epoch 01:  93%|█████████▎| 4495/4850 [21:34<01:40,  3.51it/s]

Epoch: 2, Loss: 2.354909896850586


Processing epoch 01:  93%|█████████▎| 4496/4850 [21:34<01:40,  3.51it/s]

Epoch: 2, Loss: 2.4144415855407715


Processing epoch 01:  93%|█████████▎| 4497/4850 [21:34<01:40,  3.52it/s]

Epoch: 2, Loss: 2.027269124984741


Processing epoch 01:  93%|█████████▎| 4498/4850 [21:35<01:39,  3.53it/s]

Epoch: 2, Loss: 2.878800392150879


Processing epoch 01:  93%|█████████▎| 4499/4850 [21:35<01:39,  3.53it/s]

Epoch: 2, Loss: 1.8064460754394531


Processing epoch 01:  93%|█████████▎| 4500/4850 [21:35<01:39,  3.53it/s]

Epoch: 2, Loss: 2.534669876098633


Processing epoch 01:  93%|█████████▎| 4501/4850 [21:36<01:38,  3.53it/s]

Epoch: 2, Loss: 2.594493865966797


Processing epoch 01:  93%|█████████▎| 4502/4850 [21:36<01:38,  3.53it/s]

Epoch: 2, Loss: 2.3215503692626953


Processing epoch 01:  93%|█████████▎| 4503/4850 [21:36<01:39,  3.50it/s]

Epoch: 2, Loss: 2.2603912353515625


Processing epoch 01:  93%|█████████▎| 4504/4850 [21:36<01:38,  3.53it/s]

Epoch: 2, Loss: 2.6641685962677


Processing epoch 01:  93%|█████████▎| 4505/4850 [21:37<01:37,  3.54it/s]

Epoch: 2, Loss: 2.43410062789917


Processing epoch 01:  93%|█████████▎| 4506/4850 [21:37<01:38,  3.48it/s]

Epoch: 2, Loss: 2.79103946685791


Processing epoch 01:  93%|█████████▎| 4507/4850 [21:37<01:40,  3.41it/s]

Epoch: 2, Loss: 2.2426681518554688


Processing epoch 01:  93%|█████████▎| 4508/4850 [21:38<01:40,  3.41it/s]

Epoch: 2, Loss: 2.4307007789611816


Processing epoch 01:  93%|█████████▎| 4509/4850 [21:38<01:38,  3.45it/s]

Epoch: 2, Loss: 2.583472490310669


Processing epoch 01:  93%|█████████▎| 4510/4850 [21:38<01:39,  3.40it/s]

Epoch: 2, Loss: 2.1804232597351074


Processing epoch 01:  93%|█████████▎| 4511/4850 [21:39<01:40,  3.39it/s]

Epoch: 2, Loss: 2.2761545181274414


Processing epoch 01:  93%|█████████▎| 4512/4850 [21:39<01:40,  3.35it/s]

Epoch: 2, Loss: 2.41312575340271


Processing epoch 01:  93%|█████████▎| 4513/4850 [21:39<01:41,  3.31it/s]

Epoch: 2, Loss: 1.7415881156921387


Processing epoch 01:  93%|█████████▎| 4514/4850 [21:39<01:41,  3.31it/s]

Epoch: 2, Loss: 2.282402992248535


Processing epoch 01:  93%|█████████▎| 4515/4850 [21:40<01:40,  3.32it/s]

Epoch: 2, Loss: 2.3599328994750977


Processing epoch 01:  93%|█████████▎| 4516/4850 [21:40<01:38,  3.38it/s]

Epoch: 2, Loss: 2.424834966659546


Processing epoch 01:  93%|█████████▎| 4517/4850 [21:40<01:39,  3.33it/s]

Epoch: 2, Loss: 1.9138665199279785


Processing epoch 01:  93%|█████████▎| 4518/4850 [21:41<01:39,  3.34it/s]

Epoch: 2, Loss: 3.55122709274292


Processing epoch 01:  93%|█████████▎| 4519/4850 [21:41<01:38,  3.36it/s]

Epoch: 2, Loss: 1.603257179260254


Processing epoch 01:  93%|█████████▎| 4520/4850 [21:41<01:37,  3.39it/s]

Epoch: 2, Loss: 2.2486703395843506


Processing epoch 01:  93%|█████████▎| 4521/4850 [21:41<01:35,  3.45it/s]

Epoch: 2, Loss: 3.4727461338043213


Processing epoch 01:  93%|█████████▎| 4522/4850 [21:42<01:34,  3.47it/s]

Epoch: 2, Loss: 2.3855879306793213


Processing epoch 01:  93%|█████████▎| 4523/4850 [21:42<01:33,  3.50it/s]

Epoch: 2, Loss: 2.2718124389648438


Processing epoch 01:  93%|█████████▎| 4524/4850 [21:42<01:33,  3.47it/s]

Epoch: 2, Loss: 2.346792697906494


Processing epoch 01:  93%|█████████▎| 4525/4850 [21:43<01:34,  3.46it/s]

Epoch: 2, Loss: 2.5461654663085938


Processing epoch 01:  93%|█████████▎| 4526/4850 [21:43<01:33,  3.48it/s]

Epoch: 2, Loss: 2.352750301361084


Processing epoch 01:  93%|█████████▎| 4527/4850 [21:43<01:32,  3.51it/s]

Epoch: 2, Loss: 2.4033455848693848


Processing epoch 01:  93%|█████████▎| 4528/4850 [21:43<01:31,  3.51it/s]

Epoch: 2, Loss: 2.5926342010498047


Processing epoch 01:  93%|█████████▎| 4529/4850 [21:44<01:31,  3.51it/s]

Epoch: 2, Loss: 3.3963310718536377


Processing epoch 01:  93%|█████████▎| 4530/4850 [21:44<01:31,  3.49it/s]

Epoch: 2, Loss: 2.542762517929077


Processing epoch 01:  93%|█████████▎| 4531/4850 [21:44<01:32,  3.46it/s]

Epoch: 2, Loss: 2.12479829788208


Processing epoch 01:  93%|█████████▎| 4532/4850 [21:45<01:31,  3.48it/s]

Epoch: 2, Loss: 2.545858144760132


Processing epoch 01:  93%|█████████▎| 4533/4850 [21:45<01:30,  3.49it/s]

Epoch: 2, Loss: 2.4135193824768066


Processing epoch 01:  93%|█████████▎| 4534/4850 [21:45<01:30,  3.49it/s]

Epoch: 2, Loss: 1.822515845298767


Processing epoch 01:  94%|█████████▎| 4535/4850 [21:45<01:29,  3.51it/s]

Epoch: 2, Loss: 2.745889663696289


Processing epoch 01:  94%|█████████▎| 4536/4850 [21:46<01:29,  3.51it/s]

Epoch: 2, Loss: 2.182020664215088


Processing epoch 01:  94%|█████████▎| 4537/4850 [21:46<01:29,  3.52it/s]

Epoch: 2, Loss: 2.305797576904297


Processing epoch 01:  94%|█████████▎| 4538/4850 [21:46<01:29,  3.50it/s]

Epoch: 2, Loss: 1.9009822607040405


Processing epoch 01:  94%|█████████▎| 4539/4850 [21:47<01:29,  3.47it/s]

Epoch: 2, Loss: 2.362029552459717


Processing epoch 01:  94%|█████████▎| 4540/4850 [21:47<01:28,  3.49it/s]

Epoch: 2, Loss: 2.7684671878814697


Processing epoch 01:  94%|█████████▎| 4541/4850 [21:47<01:28,  3.48it/s]

Epoch: 2, Loss: 2.437133550643921


Processing epoch 01:  94%|█████████▎| 4542/4850 [21:48<01:29,  3.44it/s]

Epoch: 2, Loss: 2.725543737411499


Processing epoch 01:  94%|█████████▎| 4543/4850 [21:48<01:29,  3.44it/s]

Epoch: 2, Loss: 2.398581027984619


Processing epoch 01:  94%|█████████▎| 4544/4850 [21:48<01:28,  3.48it/s]

Epoch: 2, Loss: 2.6774652004241943


Processing epoch 01:  94%|█████████▎| 4545/4850 [21:48<01:27,  3.48it/s]

Epoch: 2, Loss: 2.3995187282562256


Processing epoch 01:  94%|█████████▎| 4546/4850 [21:49<01:26,  3.50it/s]

Epoch: 2, Loss: 2.5321502685546875


Processing epoch 01:  94%|█████████▍| 4547/4850 [21:49<01:26,  3.51it/s]

Epoch: 2, Loss: 2.443791389465332


Processing epoch 01:  94%|█████████▍| 4548/4850 [21:49<01:26,  3.51it/s]

Epoch: 2, Loss: 2.1310882568359375


Processing epoch 01:  94%|█████████▍| 4549/4850 [21:50<01:25,  3.52it/s]

Epoch: 2, Loss: 2.3308491706848145


Processing epoch 01:  94%|█████████▍| 4550/4850 [21:50<01:25,  3.51it/s]

Epoch: 2, Loss: 2.923771381378174


Processing epoch 01:  94%|█████████▍| 4551/4850 [21:50<01:24,  3.52it/s]

Epoch: 2, Loss: 1.9779175519943237


Processing epoch 01:  94%|█████████▍| 4552/4850 [21:50<01:24,  3.51it/s]

Epoch: 2, Loss: 2.613518476486206


Processing epoch 01:  94%|█████████▍| 4553/4850 [21:51<01:25,  3.49it/s]

Epoch: 2, Loss: 2.8743419647216797


Processing epoch 01:  94%|█████████▍| 4554/4850 [21:51<01:25,  3.45it/s]

Epoch: 2, Loss: 2.106947183609009


Processing epoch 01:  94%|█████████▍| 4555/4850 [21:51<01:25,  3.46it/s]

Epoch: 2, Loss: 2.657884359359741


Processing epoch 01:  94%|█████████▍| 4556/4850 [21:52<01:25,  3.42it/s]

Epoch: 2, Loss: 2.23738431930542


Processing epoch 01:  94%|█████████▍| 4557/4850 [21:52<01:26,  3.37it/s]

Epoch: 2, Loss: 2.344918727874756


Processing epoch 01:  94%|█████████▍| 4558/4850 [21:52<01:27,  3.36it/s]

Epoch: 2, Loss: 2.5593957901000977


Processing epoch 01:  94%|█████████▍| 4559/4850 [21:52<01:25,  3.41it/s]

Epoch: 2, Loss: 2.510512590408325


Processing epoch 01:  94%|█████████▍| 4560/4850 [21:53<01:24,  3.45it/s]

Epoch: 2, Loss: 2.7542271614074707


Processing epoch 01:  94%|█████████▍| 4561/4850 [21:53<01:24,  3.42it/s]

Epoch: 2, Loss: 2.4098072052001953


Processing epoch 01:  94%|█████████▍| 4562/4850 [21:53<01:24,  3.40it/s]

Epoch: 2, Loss: 2.2110495567321777


Processing epoch 01:  94%|█████████▍| 4563/4850 [21:54<01:24,  3.38it/s]

Epoch: 2, Loss: 2.031402349472046


Processing epoch 01:  94%|█████████▍| 4564/4850 [21:54<01:24,  3.37it/s]

Epoch: 2, Loss: 2.399071216583252


Processing epoch 01:  94%|█████████▍| 4565/4850 [21:54<01:24,  3.37it/s]

Epoch: 2, Loss: 2.3848018646240234


Processing epoch 01:  94%|█████████▍| 4566/4850 [21:54<01:24,  3.36it/s]

Epoch: 2, Loss: 1.9485430717468262


Processing epoch 01:  94%|█████████▍| 4567/4850 [21:55<01:23,  3.41it/s]

Epoch: 2, Loss: 2.410954475402832


Processing epoch 01:  94%|█████████▍| 4568/4850 [21:55<01:21,  3.45it/s]

Epoch: 2, Loss: 2.5305981636047363


Processing epoch 01:  94%|█████████▍| 4569/4850 [21:55<01:22,  3.42it/s]

Epoch: 2, Loss: 3.6645965576171875


Processing epoch 01:  94%|█████████▍| 4570/4850 [21:56<01:22,  3.39it/s]

Epoch: 2, Loss: 2.583073854446411


Processing epoch 01:  94%|█████████▍| 4571/4850 [21:56<01:21,  3.41it/s]

Epoch: 2, Loss: 2.03726863861084


Processing epoch 01:  94%|█████████▍| 4572/4850 [21:56<01:20,  3.43it/s]

Epoch: 2, Loss: 2.4233741760253906


Processing epoch 01:  94%|█████████▍| 4573/4850 [21:57<01:19,  3.46it/s]

Epoch: 2, Loss: 2.5615124702453613


Processing epoch 01:  94%|█████████▍| 4574/4850 [21:57<01:19,  3.48it/s]

Epoch: 2, Loss: 2.3363735675811768


Processing epoch 01:  94%|█████████▍| 4575/4850 [21:57<01:18,  3.50it/s]

Epoch: 2, Loss: 2.3534600734710693


Processing epoch 01:  94%|█████████▍| 4576/4850 [21:57<01:17,  3.51it/s]

Epoch: 2, Loss: 2.8347041606903076


Processing epoch 01:  94%|█████████▍| 4577/4850 [21:58<01:17,  3.51it/s]

Epoch: 2, Loss: 2.4160704612731934


Processing epoch 01:  94%|█████████▍| 4578/4850 [21:58<01:17,  3.52it/s]

Epoch: 2, Loss: 2.6197562217712402


Processing epoch 01:  94%|█████████▍| 4579/4850 [21:58<01:16,  3.54it/s]

Epoch: 2, Loss: 2.5402517318725586


Processing epoch 01:  94%|█████████▍| 4580/4850 [21:58<01:16,  3.53it/s]

Epoch: 2, Loss: 2.3272595405578613


Processing epoch 01:  94%|█████████▍| 4581/4850 [21:59<01:16,  3.52it/s]

Epoch: 2, Loss: 2.169452428817749


Processing epoch 01:  94%|█████████▍| 4582/4850 [21:59<01:15,  3.53it/s]

Epoch: 2, Loss: 2.8007330894470215


Processing epoch 01:  94%|█████████▍| 4583/4850 [21:59<01:15,  3.53it/s]

Epoch: 2, Loss: 2.7424111366271973


Processing epoch 01:  95%|█████████▍| 4584/4850 [22:00<01:15,  3.54it/s]

Epoch: 2, Loss: 2.113074779510498


Processing epoch 01:  95%|█████████▍| 4585/4850 [22:00<01:15,  3.53it/s]

Epoch: 2, Loss: 2.0842137336730957


Processing epoch 01:  95%|█████████▍| 4586/4850 [22:00<01:15,  3.52it/s]

Epoch: 2, Loss: 2.1283273696899414


Processing epoch 01:  95%|█████████▍| 4587/4850 [22:00<01:14,  3.52it/s]

Epoch: 2, Loss: 2.3448328971862793


Processing epoch 01:  95%|█████████▍| 4588/4850 [22:01<01:14,  3.49it/s]

Epoch: 2, Loss: 2.118098258972168


Processing epoch 01:  95%|█████████▍| 4589/4850 [22:01<01:14,  3.51it/s]

Epoch: 2, Loss: 2.452723503112793


Processing epoch 01:  95%|█████████▍| 4590/4850 [22:01<01:13,  3.53it/s]

Epoch: 2, Loss: 2.388962745666504


Processing epoch 01:  95%|█████████▍| 4591/4850 [22:02<01:13,  3.51it/s]

Epoch: 2, Loss: 1.912466287612915


Processing epoch 01:  95%|█████████▍| 4592/4850 [22:02<01:13,  3.50it/s]

Epoch: 2, Loss: 2.010920524597168


Processing epoch 01:  95%|█████████▍| 4593/4850 [22:02<01:14,  3.46it/s]

Epoch: 2, Loss: 2.3352808952331543


Processing epoch 01:  95%|█████████▍| 4594/4850 [22:02<01:13,  3.49it/s]

Epoch: 2, Loss: 2.605414867401123


Processing epoch 01:  95%|█████████▍| 4595/4850 [22:03<01:13,  3.48it/s]

Epoch: 2, Loss: 2.228374719619751


Processing epoch 01:  95%|█████████▍| 4596/4850 [22:03<01:13,  3.47it/s]

Epoch: 2, Loss: 2.6705446243286133


Processing epoch 01:  95%|█████████▍| 4597/4850 [22:03<01:13,  3.42it/s]

Epoch: 2, Loss: 2.212704658508301


Processing epoch 01:  95%|█████████▍| 4598/4850 [22:04<01:13,  3.45it/s]

Epoch: 2, Loss: 2.370084524154663


Processing epoch 01:  95%|█████████▍| 4599/4850 [22:04<01:12,  3.47it/s]

Epoch: 2, Loss: 2.108738899230957


Processing epoch 01:  95%|█████████▍| 4600/4850 [22:04<01:11,  3.48it/s]

Epoch: 2, Loss: 2.9916443824768066


Processing epoch 01:  95%|█████████▍| 4601/4850 [22:05<01:11,  3.50it/s]

Epoch: 2, Loss: 2.345733642578125


Processing epoch 01:  95%|█████████▍| 4602/4850 [22:05<01:10,  3.52it/s]

Epoch: 2, Loss: 3.0203800201416016


Processing epoch 01:  95%|█████████▍| 4603/4850 [22:05<01:10,  3.51it/s]

Epoch: 2, Loss: 2.446187973022461


Processing epoch 01:  95%|█████████▍| 4604/4850 [22:05<01:10,  3.47it/s]

Epoch: 2, Loss: 2.200108051300049


Processing epoch 01:  95%|█████████▍| 4605/4850 [22:06<01:10,  3.49it/s]

Epoch: 2, Loss: 2.3803186416625977


Processing epoch 01:  95%|█████████▍| 4606/4850 [22:06<01:10,  3.45it/s]

Epoch: 2, Loss: 3.2924389839172363


Processing epoch 01:  95%|█████████▍| 4607/4850 [22:06<01:11,  3.41it/s]

Epoch: 2, Loss: 2.519517421722412


Processing epoch 01:  95%|█████████▌| 4608/4850 [22:07<01:10,  3.44it/s]

Epoch: 2, Loss: 2.611722946166992


Processing epoch 01:  95%|█████████▌| 4609/4850 [22:07<01:09,  3.47it/s]

Epoch: 2, Loss: 2.3731729984283447


Processing epoch 01:  95%|█████████▌| 4610/4850 [22:07<01:08,  3.50it/s]

Epoch: 2, Loss: 2.746542453765869


Processing epoch 01:  95%|█████████▌| 4611/4850 [22:07<01:09,  3.43it/s]

Epoch: 2, Loss: 2.747863292694092


Processing epoch 01:  95%|█████████▌| 4612/4850 [22:08<01:09,  3.42it/s]

Epoch: 2, Loss: 2.722109794616699


Processing epoch 01:  95%|█████████▌| 4613/4850 [22:08<01:10,  3.36it/s]

Epoch: 2, Loss: 2.507589340209961


Processing epoch 01:  95%|█████████▌| 4614/4850 [22:08<01:09,  3.37it/s]

Epoch: 2, Loss: 3.156294345855713


Processing epoch 01:  95%|█████████▌| 4615/4850 [22:09<01:09,  3.37it/s]

Epoch: 2, Loss: 1.9417357444763184


Processing epoch 01:  95%|█████████▌| 4616/4850 [22:09<01:10,  3.34it/s]

Epoch: 2, Loss: 2.1938111782073975


Processing epoch 01:  95%|█████████▌| 4617/4850 [22:09<01:09,  3.33it/s]

Epoch: 2, Loss: 2.868283987045288


Processing epoch 01:  95%|█████████▌| 4618/4850 [22:10<01:09,  3.35it/s]

Epoch: 2, Loss: 2.737549304962158


Processing epoch 01:  95%|█████████▌| 4619/4850 [22:10<01:08,  3.35it/s]

Epoch: 2, Loss: 2.5693411827087402


Processing epoch 01:  95%|█████████▌| 4620/4850 [22:10<01:08,  3.35it/s]

Epoch: 2, Loss: 2.038166046142578


Processing epoch 01:  95%|█████████▌| 4621/4850 [22:10<01:07,  3.39it/s]

Epoch: 2, Loss: 2.286048650741577


Processing epoch 01:  95%|█████████▌| 4622/4850 [22:11<01:06,  3.42it/s]

Epoch: 2, Loss: 2.7910842895507812


Processing epoch 01:  95%|█████████▌| 4623/4850 [22:11<01:05,  3.45it/s]

Epoch: 2, Loss: 2.871426820755005


Processing epoch 01:  95%|█████████▌| 4624/4850 [22:11<01:05,  3.46it/s]

Epoch: 2, Loss: 2.077702522277832


Processing epoch 01:  95%|█████████▌| 4625/4850 [22:12<01:04,  3.49it/s]

Epoch: 2, Loss: 2.41520094871521


Processing epoch 01:  95%|█████████▌| 4626/4850 [22:12<01:04,  3.49it/s]

Epoch: 2, Loss: 2.378452777862549


Processing epoch 01:  95%|█████████▌| 4627/4850 [22:12<01:03,  3.49it/s]

Epoch: 2, Loss: 2.3202340602874756


Processing epoch 01:  95%|█████████▌| 4628/4850 [22:12<01:03,  3.50it/s]

Epoch: 2, Loss: 2.8147292137145996


Processing epoch 01:  95%|█████████▌| 4629/4850 [22:13<01:03,  3.51it/s]

Epoch: 2, Loss: 2.1115503311157227


Processing epoch 01:  95%|█████████▌| 4630/4850 [22:13<01:02,  3.51it/s]

Epoch: 2, Loss: 2.545279026031494


Processing epoch 01:  95%|█████████▌| 4631/4850 [22:13<01:02,  3.50it/s]

Epoch: 2, Loss: 2.21264910697937


Processing epoch 01:  96%|█████████▌| 4632/4850 [22:14<01:02,  3.50it/s]

Epoch: 2, Loss: 1.971883773803711


Processing epoch 01:  96%|█████████▌| 4633/4850 [22:14<01:03,  3.44it/s]

Epoch: 2, Loss: 2.3077378273010254


Processing epoch 01:  96%|█████████▌| 4634/4850 [22:14<01:02,  3.46it/s]

Epoch: 2, Loss: 2.1425790786743164


Processing epoch 01:  96%|█████████▌| 4635/4850 [22:14<01:02,  3.45it/s]

Epoch: 2, Loss: 2.6614890098571777


Processing epoch 01:  96%|█████████▌| 4636/4850 [22:15<01:01,  3.49it/s]

Epoch: 2, Loss: 2.137938976287842


Processing epoch 01:  96%|█████████▌| 4637/4850 [22:15<01:00,  3.50it/s]

Epoch: 2, Loss: 2.7196617126464844


Processing epoch 01:  96%|█████████▌| 4638/4850 [22:15<01:00,  3.51it/s]

Epoch: 2, Loss: 2.1588072776794434


Processing epoch 01:  96%|█████████▌| 4639/4850 [22:16<01:00,  3.51it/s]

Epoch: 2, Loss: 2.4258174896240234


Processing epoch 01:  96%|█████████▌| 4640/4850 [22:16<01:00,  3.50it/s]

Epoch: 2, Loss: 2.8132805824279785


Processing epoch 01:  96%|█████████▌| 4641/4850 [22:16<00:59,  3.49it/s]

Epoch: 2, Loss: 2.466606855392456


Processing epoch 01:  96%|█████████▌| 4642/4850 [22:16<00:59,  3.48it/s]

Epoch: 2, Loss: 2.4420337677001953


Processing epoch 01:  96%|█████████▌| 4643/4850 [22:17<00:59,  3.50it/s]

Epoch: 2, Loss: 2.7825491428375244


Processing epoch 01:  96%|█████████▌| 4644/4850 [22:17<00:59,  3.49it/s]

Epoch: 2, Loss: 2.8021018505096436


Processing epoch 01:  96%|█████████▌| 4645/4850 [22:17<00:58,  3.50it/s]

Epoch: 2, Loss: 2.2291672229766846


Processing epoch 01:  96%|█████████▌| 4646/4850 [22:18<00:58,  3.51it/s]

Epoch: 2, Loss: 1.7987982034683228


Processing epoch 01:  96%|█████████▌| 4647/4850 [22:18<00:57,  3.52it/s]

Epoch: 2, Loss: 2.4125633239746094


Processing epoch 01:  96%|█████████▌| 4648/4850 [22:18<00:57,  3.53it/s]

Epoch: 2, Loss: 2.1337122917175293


Processing epoch 01:  96%|█████████▌| 4649/4850 [22:18<00:56,  3.53it/s]

Epoch: 2, Loss: 2.7868807315826416


Processing epoch 01:  96%|█████████▌| 4650/4850 [22:19<00:56,  3.55it/s]

Epoch: 2, Loss: 3.8919081687927246


Processing epoch 01:  96%|█████████▌| 4651/4850 [22:19<00:56,  3.54it/s]

Epoch: 2, Loss: 2.4281020164489746


Processing epoch 01:  96%|█████████▌| 4652/4850 [22:19<00:56,  3.53it/s]

Epoch: 2, Loss: 2.7808027267456055


Processing epoch 01:  96%|█████████▌| 4653/4850 [22:20<00:55,  3.54it/s]

Epoch: 2, Loss: 2.4921205043792725


Processing epoch 01:  96%|█████████▌| 4654/4850 [22:20<00:55,  3.54it/s]

Epoch: 2, Loss: 2.7325103282928467


Processing epoch 01:  96%|█████████▌| 4655/4850 [22:20<00:55,  3.50it/s]

Epoch: 2, Loss: 2.079124689102173


Processing epoch 01:  96%|█████████▌| 4656/4850 [22:20<00:56,  3.46it/s]

Epoch: 2, Loss: 3.409111738204956


Processing epoch 01:  96%|█████████▌| 4657/4850 [22:21<00:56,  3.39it/s]

Epoch: 2, Loss: 2.0589404106140137


Processing epoch 01:  96%|█████████▌| 4658/4850 [22:21<00:56,  3.39it/s]

Epoch: 2, Loss: 2.5007715225219727


Processing epoch 01:  96%|█████████▌| 4659/4850 [22:21<00:56,  3.41it/s]

Epoch: 2, Loss: 1.8419904708862305


Processing epoch 01:  96%|█████████▌| 4660/4850 [22:22<00:55,  3.44it/s]

Epoch: 2, Loss: 2.380279064178467


Processing epoch 01:  96%|█████████▌| 4661/4850 [22:22<00:54,  3.48it/s]

Epoch: 2, Loss: 2.6550345420837402


Processing epoch 01:  96%|█████████▌| 4662/4850 [22:22<00:53,  3.49it/s]

Epoch: 2, Loss: 2.1873679161071777


Processing epoch 01:  96%|█████████▌| 4663/4850 [22:22<00:53,  3.48it/s]

Epoch: 2, Loss: 2.456632614135742


Processing epoch 01:  96%|█████████▌| 4664/4850 [22:23<00:54,  3.44it/s]

Epoch: 2, Loss: 2.992347478866577


Processing epoch 01:  96%|█████████▌| 4665/4850 [22:23<00:54,  3.39it/s]

Epoch: 2, Loss: 2.543876886367798


Processing epoch 01:  96%|█████████▌| 4666/4850 [22:23<00:54,  3.38it/s]

Epoch: 2, Loss: 2.5854225158691406


Processing epoch 01:  96%|█████████▌| 4667/4850 [22:24<00:54,  3.34it/s]

Epoch: 2, Loss: 3.0550384521484375


Processing epoch 01:  96%|█████████▌| 4668/4850 [22:24<00:54,  3.33it/s]

Epoch: 2, Loss: 2.059035301208496


Processing epoch 01:  96%|█████████▋| 4669/4850 [22:24<00:54,  3.33it/s]

Epoch: 2, Loss: 2.2497496604919434


Processing epoch 01:  96%|█████████▋| 4670/4850 [22:25<00:53,  3.34it/s]

Epoch: 2, Loss: 2.220219850540161


Processing epoch 01:  96%|█████████▋| 4671/4850 [22:25<00:54,  3.29it/s]

Epoch: 2, Loss: 2.157679557800293


Processing epoch 01:  96%|█████████▋| 4672/4850 [22:25<00:52,  3.37it/s]

Epoch: 2, Loss: 2.8008322715759277


Processing epoch 01:  96%|█████████▋| 4673/4850 [22:25<00:52,  3.40it/s]

Epoch: 2, Loss: 2.9394383430480957


Processing epoch 01:  96%|█████████▋| 4674/4850 [22:26<00:51,  3.44it/s]

Epoch: 2, Loss: 2.3117706775665283


Processing epoch 01:  96%|█████████▋| 4675/4850 [22:26<00:50,  3.48it/s]

Epoch: 2, Loss: 2.505002498626709


Processing epoch 01:  96%|█████████▋| 4676/4850 [22:26<00:49,  3.50it/s]

Epoch: 2, Loss: 2.632382392883301


Processing epoch 01:  96%|█████████▋| 4677/4850 [22:27<00:49,  3.51it/s]

Epoch: 2, Loss: 2.8182625770568848


Processing epoch 01:  96%|█████████▋| 4678/4850 [22:27<00:48,  3.51it/s]

Epoch: 2, Loss: 2.607370376586914


Processing epoch 01:  96%|█████████▋| 4679/4850 [22:27<00:48,  3.49it/s]

Epoch: 2, Loss: 2.777658462524414


Processing epoch 01:  96%|█████████▋| 4680/4850 [22:27<00:48,  3.47it/s]

Epoch: 2, Loss: 2.373305320739746


Processing epoch 01:  97%|█████████▋| 4681/4850 [22:28<00:48,  3.48it/s]

Epoch: 2, Loss: 2.0562429428100586


Processing epoch 01:  97%|█████████▋| 4682/4850 [22:28<00:48,  3.49it/s]

Epoch: 2, Loss: 2.559251546859741


Processing epoch 01:  97%|█████████▋| 4683/4850 [22:28<00:47,  3.50it/s]

Epoch: 2, Loss: 2.656468391418457


Processing epoch 01:  97%|█████████▋| 4684/4850 [22:29<00:47,  3.52it/s]

Epoch: 2, Loss: 3.6258654594421387


Processing epoch 01:  97%|█████████▋| 4685/4850 [22:29<00:46,  3.51it/s]

Epoch: 2, Loss: 2.2751522064208984


Processing epoch 01:  97%|█████████▋| 4686/4850 [22:29<00:46,  3.52it/s]

Epoch: 2, Loss: 2.6448841094970703


Processing epoch 01:  97%|█████████▋| 4687/4850 [22:29<00:46,  3.52it/s]

Epoch: 2, Loss: 2.3818657398223877


Processing epoch 01:  97%|█████████▋| 4688/4850 [22:30<00:45,  3.53it/s]

Epoch: 2, Loss: 2.2500171661376953


Processing epoch 01:  97%|█████████▋| 4689/4850 [22:30<00:45,  3.51it/s]

Epoch: 2, Loss: 2.8197402954101562


Processing epoch 01:  97%|█████████▋| 4690/4850 [22:30<00:45,  3.51it/s]

Epoch: 2, Loss: 2.6543984413146973


Processing epoch 01:  97%|█████████▋| 4691/4850 [22:31<00:45,  3.48it/s]

Epoch: 2, Loss: 2.334306240081787


Processing epoch 01:  97%|█████████▋| 4692/4850 [22:31<00:45,  3.48it/s]

Epoch: 2, Loss: 2.5443882942199707


Processing epoch 01:  97%|█████████▋| 4693/4850 [22:31<00:44,  3.51it/s]

Epoch: 2, Loss: 3.023496150970459


Processing epoch 01:  97%|█████████▋| 4694/4850 [22:31<00:44,  3.49it/s]

Epoch: 2, Loss: 2.305907726287842


Processing epoch 01:  97%|█████████▋| 4695/4850 [22:32<00:44,  3.48it/s]

Epoch: 2, Loss: 2.166916608810425


Processing epoch 01:  97%|█████████▋| 4696/4850 [22:32<00:43,  3.50it/s]

Epoch: 2, Loss: 2.576413869857788


Processing epoch 01:  97%|█████████▋| 4697/4850 [22:32<00:43,  3.51it/s]

Epoch: 2, Loss: 2.223205089569092


Processing epoch 01:  97%|█████████▋| 4698/4850 [22:33<00:43,  3.53it/s]

Epoch: 2, Loss: 2.1700873374938965


Processing epoch 01:  97%|█████████▋| 4699/4850 [22:33<00:42,  3.53it/s]

Epoch: 2, Loss: 3.0605146884918213


Processing epoch 01:  97%|█████████▋| 4700/4850 [22:33<00:42,  3.53it/s]

Epoch: 2, Loss: 2.5902695655822754


Processing epoch 01:  97%|█████████▋| 4701/4850 [22:33<00:42,  3.53it/s]

Epoch: 2, Loss: 2.7911341190338135


Processing epoch 01:  97%|█████████▋| 4702/4850 [22:34<00:42,  3.49it/s]

Epoch: 2, Loss: 2.410586357116699


Processing epoch 01:  97%|█████████▋| 4703/4850 [22:34<00:42,  3.50it/s]

Epoch: 2, Loss: 2.4213502407073975


Processing epoch 01:  97%|█████████▋| 4704/4850 [22:34<00:41,  3.49it/s]

Epoch: 2, Loss: 2.049933433532715


Processing epoch 01:  97%|█████████▋| 4705/4850 [22:35<00:41,  3.49it/s]

Epoch: 2, Loss: 2.349985361099243


Processing epoch 01:  97%|█████████▋| 4706/4850 [22:35<00:41,  3.48it/s]

Epoch: 2, Loss: 2.094557762145996


Processing epoch 01:  97%|█████████▋| 4707/4850 [22:35<00:41,  3.46it/s]

Epoch: 2, Loss: 2.4691038131713867


Processing epoch 01:  97%|█████████▋| 4708/4850 [22:35<00:41,  3.42it/s]

Epoch: 2, Loss: 2.4925544261932373


Processing epoch 01:  97%|█████████▋| 4709/4850 [22:36<00:41,  3.42it/s]

Epoch: 2, Loss: 2.4474129676818848


Processing epoch 01:  97%|█████████▋| 4710/4850 [22:36<00:40,  3.46it/s]

Epoch: 2, Loss: 2.664325714111328


Processing epoch 01:  97%|█████████▋| 4711/4850 [22:36<00:41,  3.37it/s]

Epoch: 2, Loss: 2.903867244720459


Processing epoch 01:  97%|█████████▋| 4712/4850 [22:37<00:40,  3.39it/s]

Epoch: 2, Loss: 2.3471055030822754


Processing epoch 01:  97%|█████████▋| 4713/4850 [22:37<00:40,  3.37it/s]

Epoch: 2, Loss: 3.029296636581421


Processing epoch 01:  97%|█████████▋| 4714/4850 [22:37<00:41,  3.31it/s]

Epoch: 2, Loss: 2.3828773498535156


Processing epoch 01:  97%|█████████▋| 4715/4850 [22:38<00:40,  3.31it/s]

Epoch: 2, Loss: 2.3904666900634766


Processing epoch 01:  97%|█████████▋| 4716/4850 [22:38<00:39,  3.37it/s]

Epoch: 2, Loss: 2.944284200668335


Processing epoch 01:  97%|█████████▋| 4717/4850 [22:38<00:39,  3.37it/s]

Epoch: 2, Loss: 2.528055191040039


Processing epoch 01:  97%|█████████▋| 4718/4850 [22:38<00:38,  3.39it/s]

Epoch: 2, Loss: 2.6803786754608154


Processing epoch 01:  97%|█████████▋| 4719/4850 [22:39<00:38,  3.43it/s]

Epoch: 2, Loss: 2.424311399459839


Processing epoch 01:  97%|█████████▋| 4720/4850 [22:39<00:37,  3.45it/s]

Epoch: 2, Loss: 2.863754987716675


Processing epoch 01:  97%|█████████▋| 4721/4850 [22:39<00:37,  3.43it/s]

Epoch: 2, Loss: 2.8331236839294434


Processing epoch 01:  97%|█████████▋| 4722/4850 [22:40<00:37,  3.37it/s]

Epoch: 2, Loss: 2.6156249046325684


Processing epoch 01:  97%|█████████▋| 4723/4850 [22:40<00:37,  3.42it/s]

Epoch: 2, Loss: 2.3015201091766357


Processing epoch 01:  97%|█████████▋| 4724/4850 [22:40<00:36,  3.46it/s]

Epoch: 2, Loss: 2.8991246223449707


Processing epoch 01:  97%|█████████▋| 4725/4850 [22:40<00:35,  3.48it/s]

Epoch: 2, Loss: 2.9079644680023193


Processing epoch 01:  97%|█████████▋| 4726/4850 [22:41<00:35,  3.48it/s]

Epoch: 2, Loss: 2.2616238594055176


Processing epoch 01:  97%|█████████▋| 4727/4850 [22:41<00:35,  3.49it/s]

Epoch: 2, Loss: 2.2241592407226562


Processing epoch 01:  97%|█████████▋| 4728/4850 [22:41<00:34,  3.52it/s]

Epoch: 2, Loss: 2.583756446838379


Processing epoch 01:  98%|█████████▊| 4729/4850 [22:42<00:34,  3.51it/s]

Epoch: 2, Loss: 2.244173049926758


Processing epoch 01:  98%|█████████▊| 4730/4850 [22:42<00:34,  3.49it/s]

Epoch: 2, Loss: 1.953505039215088


Processing epoch 01:  98%|█████████▊| 4731/4850 [22:42<00:34,  3.47it/s]

Epoch: 2, Loss: 2.0028042793273926


Processing epoch 01:  98%|█████████▊| 4732/4850 [22:42<00:33,  3.48it/s]

Epoch: 2, Loss: 2.4100306034088135


Processing epoch 01:  98%|█████████▊| 4733/4850 [22:43<00:33,  3.48it/s]

Epoch: 2, Loss: 2.2797374725341797


Processing epoch 01:  98%|█████████▊| 4734/4850 [22:43<00:33,  3.49it/s]

Epoch: 2, Loss: 2.4594955444335938


Processing epoch 01:  98%|█████████▊| 4735/4850 [22:43<00:32,  3.51it/s]

Epoch: 2, Loss: 3.3189945220947266


Processing epoch 01:  98%|█████████▊| 4736/4850 [22:44<00:32,  3.52it/s]

Epoch: 2, Loss: 2.3605427742004395


Processing epoch 01:  98%|█████████▊| 4737/4850 [22:44<00:32,  3.49it/s]

Epoch: 2, Loss: 2.473917007446289


Processing epoch 01:  98%|█████████▊| 4738/4850 [22:44<00:32,  3.43it/s]

Epoch: 2, Loss: 2.668837547302246


Processing epoch 01:  98%|█████████▊| 4739/4850 [22:44<00:31,  3.48it/s]

Epoch: 2, Loss: 2.826815605163574


Processing epoch 01:  98%|█████████▊| 4740/4850 [22:45<00:31,  3.49it/s]

Epoch: 2, Loss: 2.601715087890625


Processing epoch 01:  98%|█████████▊| 4741/4850 [22:45<00:31,  3.48it/s]

Epoch: 2, Loss: 2.2566144466400146


Processing epoch 01:  98%|█████████▊| 4742/4850 [22:45<00:30,  3.50it/s]

Epoch: 2, Loss: 2.280170202255249


Processing epoch 01:  98%|█████████▊| 4743/4850 [22:46<00:30,  3.52it/s]

Epoch: 2, Loss: 2.245870590209961


Processing epoch 01:  98%|█████████▊| 4744/4850 [22:46<00:30,  3.50it/s]

Epoch: 2, Loss: 2.285579204559326


Processing epoch 01:  98%|█████████▊| 4745/4850 [22:46<00:29,  3.51it/s]

Epoch: 2, Loss: 2.628695487976074


Processing epoch 01:  98%|█████████▊| 4746/4850 [22:46<00:29,  3.51it/s]

Epoch: 2, Loss: 2.356126308441162


Processing epoch 01:  98%|█████████▊| 4747/4850 [22:47<00:29,  3.48it/s]

Epoch: 2, Loss: 2.5910134315490723


Processing epoch 01:  98%|█████████▊| 4748/4850 [22:47<00:29,  3.47it/s]

Epoch: 2, Loss: 2.4289989471435547


Processing epoch 01:  98%|█████████▊| 4749/4850 [22:47<00:29,  3.43it/s]

Epoch: 2, Loss: 2.1551854610443115


Processing epoch 01:  98%|█████████▊| 4750/4850 [22:48<00:28,  3.46it/s]

Epoch: 2, Loss: 2.0450024604797363


Processing epoch 01:  98%|█████████▊| 4751/4850 [22:48<00:28,  3.46it/s]

Epoch: 2, Loss: 2.209317684173584


Processing epoch 01:  98%|█████████▊| 4752/4850 [22:48<00:28,  3.49it/s]

Epoch: 2, Loss: 2.7478690147399902


Processing epoch 01:  98%|█████████▊| 4753/4850 [22:48<00:27,  3.51it/s]

Epoch: 2, Loss: 2.8980765342712402


Processing epoch 01:  98%|█████████▊| 4754/4850 [22:49<00:27,  3.51it/s]

Epoch: 2, Loss: 3.1986711025238037


Processing epoch 01:  98%|█████████▊| 4755/4850 [22:49<00:26,  3.53it/s]

Epoch: 2, Loss: 2.7235288619995117


Processing epoch 01:  98%|█████████▊| 4756/4850 [22:49<00:26,  3.52it/s]

Epoch: 2, Loss: 2.320173978805542


Processing epoch 01:  98%|█████████▊| 4757/4850 [22:50<00:26,  3.51it/s]

Epoch: 2, Loss: 2.120774745941162


Processing epoch 01:  98%|█████████▊| 4758/4850 [22:50<00:26,  3.48it/s]

Epoch: 2, Loss: 2.7774741649627686


Processing epoch 01:  98%|█████████▊| 4759/4850 [22:50<00:26,  3.47it/s]

Epoch: 2, Loss: 2.2666382789611816


Processing epoch 01:  98%|█████████▊| 4760/4850 [22:50<00:26,  3.43it/s]

Epoch: 2, Loss: 2.1353278160095215


Processing epoch 01:  98%|█████████▊| 4761/4850 [22:51<00:25,  3.44it/s]

Epoch: 2, Loss: 2.4009594917297363


Processing epoch 01:  98%|█████████▊| 4762/4850 [22:51<00:25,  3.45it/s]

Epoch: 2, Loss: 2.200138568878174


Processing epoch 01:  98%|█████████▊| 4763/4850 [22:51<00:25,  3.47it/s]

Epoch: 2, Loss: 2.2766311168670654


Processing epoch 01:  98%|█████████▊| 4764/4850 [22:52<00:25,  3.43it/s]

Epoch: 2, Loss: 2.072049617767334


Processing epoch 01:  98%|█████████▊| 4765/4850 [22:52<00:24,  3.41it/s]

Epoch: 2, Loss: 2.0301332473754883


Processing epoch 01:  98%|█████████▊| 4766/4850 [22:52<00:24,  3.39it/s]

Epoch: 2, Loss: 2.185098171234131


Processing epoch 01:  98%|█████████▊| 4767/4850 [22:53<00:24,  3.34it/s]

Epoch: 2, Loss: 3.611264228820801


Processing epoch 01:  98%|█████████▊| 4768/4850 [22:53<00:24,  3.31it/s]

Epoch: 2, Loss: 2.171818971633911


Processing epoch 01:  98%|█████████▊| 4769/4850 [22:53<00:24,  3.29it/s]

Epoch: 2, Loss: 2.46254563331604


Processing epoch 01:  98%|█████████▊| 4770/4850 [22:53<00:24,  3.30it/s]

Epoch: 2, Loss: 2.4510765075683594


Processing epoch 01:  98%|█████████▊| 4771/4850 [22:54<00:23,  3.30it/s]

Epoch: 2, Loss: 2.016683340072632


Processing epoch 01:  98%|█████████▊| 4772/4850 [22:54<00:23,  3.30it/s]

Epoch: 2, Loss: 2.8695924282073975


Processing epoch 01:  98%|█████████▊| 4773/4850 [22:54<00:23,  3.29it/s]

Epoch: 2, Loss: 2.1065187454223633


Processing epoch 01:  98%|█████████▊| 4774/4850 [22:55<00:22,  3.30it/s]

Epoch: 2, Loss: 2.2909135818481445


Processing epoch 01:  98%|█████████▊| 4775/4850 [22:55<00:22,  3.34it/s]

Epoch: 2, Loss: 2.2676949501037598


Processing epoch 01:  98%|█████████▊| 4776/4850 [22:55<00:21,  3.39it/s]

Epoch: 2, Loss: 2.6411991119384766


Processing epoch 01:  98%|█████████▊| 4777/4850 [22:55<00:21,  3.44it/s]

Epoch: 2, Loss: 2.8261795043945312


Processing epoch 01:  99%|█████████▊| 4778/4850 [22:56<00:20,  3.45it/s]

Epoch: 2, Loss: 2.827704906463623


Processing epoch 01:  99%|█████████▊| 4779/4850 [22:56<00:20,  3.47it/s]

Epoch: 2, Loss: 2.389131546020508


Processing epoch 01:  99%|█████████▊| 4780/4850 [22:56<00:20,  3.48it/s]

Epoch: 2, Loss: 2.0064470767974854


Processing epoch 01:  99%|█████████▊| 4781/4850 [22:57<00:19,  3.48it/s]

Epoch: 2, Loss: 2.5762572288513184


Processing epoch 01:  99%|█████████▊| 4782/4850 [22:57<00:19,  3.49it/s]

Epoch: 2, Loss: 2.984060287475586


Processing epoch 01:  99%|█████████▊| 4783/4850 [22:57<00:19,  3.47it/s]

Epoch: 2, Loss: 2.493398427963257


Processing epoch 01:  99%|█████████▊| 4784/4850 [22:57<00:18,  3.48it/s]

Epoch: 2, Loss: 2.1277308464050293


Processing epoch 01:  99%|█████████▊| 4785/4850 [22:58<00:18,  3.42it/s]

Epoch: 2, Loss: 2.1977450847625732


Processing epoch 01:  99%|█████████▊| 4786/4850 [22:58<00:18,  3.47it/s]

Epoch: 2, Loss: 2.6783719062805176


Processing epoch 01:  99%|█████████▊| 4787/4850 [22:58<00:18,  3.48it/s]

Epoch: 2, Loss: 2.23366641998291


Processing epoch 01:  99%|█████████▊| 4788/4850 [22:59<00:17,  3.49it/s]

Epoch: 2, Loss: 2.2602453231811523


Processing epoch 01:  99%|█████████▊| 4789/4850 [22:59<00:17,  3.50it/s]

Epoch: 2, Loss: 2.547818183898926


Processing epoch 01:  99%|█████████▉| 4790/4850 [22:59<00:17,  3.48it/s]

Epoch: 2, Loss: 2.425617218017578


Processing epoch 01:  99%|█████████▉| 4791/4850 [23:00<00:16,  3.49it/s]

Epoch: 2, Loss: 2.0835824012756348


Processing epoch 01:  99%|█████████▉| 4792/4850 [23:00<00:16,  3.50it/s]

Epoch: 2, Loss: 2.5799994468688965


Processing epoch 01:  99%|█████████▉| 4793/4850 [23:00<00:16,  3.48it/s]

Epoch: 2, Loss: 2.33595609664917


Processing epoch 01:  99%|█████████▉| 4794/4850 [23:00<00:16,  3.50it/s]

Epoch: 2, Loss: 2.173719882965088


Processing epoch 01:  99%|█████████▉| 4795/4850 [23:01<00:15,  3.51it/s]

Epoch: 2, Loss: 2.7903356552124023


Processing epoch 01:  99%|█████████▉| 4796/4850 [23:01<00:15,  3.49it/s]

Epoch: 2, Loss: 2.4391229152679443


Processing epoch 01:  99%|█████████▉| 4797/4850 [23:01<00:15,  3.50it/s]

Epoch: 2, Loss: 2.6940577030181885


Processing epoch 01:  99%|█████████▉| 4798/4850 [23:02<00:14,  3.49it/s]

Epoch: 2, Loss: 2.6168017387390137


Processing epoch 01:  99%|█████████▉| 4799/4850 [23:02<00:14,  3.49it/s]

Epoch: 2, Loss: 2.293956995010376


Processing epoch 01:  99%|█████████▉| 4800/4850 [23:02<00:14,  3.50it/s]

Epoch: 2, Loss: 2.491882562637329


Processing epoch 01:  99%|█████████▉| 4801/4850 [23:02<00:14,  3.48it/s]

Epoch: 2, Loss: 2.519383430480957


Processing epoch 01:  99%|█████████▉| 4802/4850 [23:03<00:13,  3.51it/s]

Epoch: 2, Loss: 2.4644432067871094


Processing epoch 01:  99%|█████████▉| 4803/4850 [23:03<00:13,  3.45it/s]

Epoch: 2, Loss: 2.5632777214050293


Processing epoch 01:  99%|█████████▉| 4804/4850 [23:03<00:13,  3.47it/s]

Epoch: 2, Loss: 2.923011302947998


Processing epoch 01:  99%|█████████▉| 4805/4850 [23:04<00:12,  3.49it/s]

Epoch: 2, Loss: 2.3237972259521484


Processing epoch 01:  99%|█████████▉| 4806/4850 [23:04<00:12,  3.50it/s]

Epoch: 2, Loss: 2.7356886863708496


Processing epoch 01:  99%|█████████▉| 4807/4850 [23:04<00:12,  3.51it/s]

Epoch: 2, Loss: 2.95918607711792


Processing epoch 01:  99%|█████████▉| 4808/4850 [23:04<00:11,  3.51it/s]

Epoch: 2, Loss: 2.3829712867736816


Processing epoch 01:  99%|█████████▉| 4809/4850 [23:05<00:11,  3.48it/s]

Epoch: 2, Loss: 2.128509998321533


Processing epoch 01:  99%|█████████▉| 4810/4850 [23:05<00:11,  3.47it/s]

Epoch: 2, Loss: 2.2272138595581055


Processing epoch 01:  99%|█████████▉| 4811/4850 [23:05<00:11,  3.40it/s]

Epoch: 2, Loss: 2.2007687091827393


Processing epoch 01:  99%|█████████▉| 4812/4850 [23:06<00:11,  3.37it/s]

Epoch: 2, Loss: 2.4259581565856934


Processing epoch 01:  99%|█████████▉| 4813/4850 [23:06<00:11,  3.31it/s]

Epoch: 2, Loss: 2.475264310836792


Processing epoch 01:  99%|█████████▉| 4814/4850 [23:06<00:10,  3.31it/s]

Epoch: 2, Loss: 3.2850072383880615


Processing epoch 01:  99%|█████████▉| 4815/4850 [23:06<00:10,  3.29it/s]

Epoch: 2, Loss: 2.9832582473754883


Processing epoch 01:  99%|█████████▉| 4816/4850 [23:07<00:10,  3.32it/s]

Epoch: 2, Loss: 2.417006015777588


Processing epoch 01:  99%|█████████▉| 4817/4850 [23:07<00:09,  3.31it/s]

Epoch: 2, Loss: 2.341630458831787


Processing epoch 01:  99%|█████████▉| 4818/4850 [23:07<00:09,  3.29it/s]

Epoch: 2, Loss: 2.9186325073242188


Processing epoch 01:  99%|█████████▉| 4819/4850 [23:08<00:09,  3.32it/s]

Epoch: 2, Loss: 3.17708158493042


Processing epoch 01:  99%|█████████▉| 4820/4850 [23:08<00:09,  3.32it/s]

Epoch: 2, Loss: 3.156419277191162


Processing epoch 01:  99%|█████████▉| 4821/4850 [23:08<00:08,  3.32it/s]

Epoch: 2, Loss: 2.520110607147217


Processing epoch 01:  99%|█████████▉| 4822/4850 [23:09<00:08,  3.37it/s]

Epoch: 2, Loss: 2.2700133323669434


Processing epoch 01:  99%|█████████▉| 4823/4850 [23:09<00:07,  3.42it/s]

Epoch: 2, Loss: 2.859712600708008


Processing epoch 01:  99%|█████████▉| 4824/4850 [23:09<00:07,  3.39it/s]

Epoch: 2, Loss: 2.2452774047851562


Processing epoch 01:  99%|█████████▉| 4825/4850 [23:09<00:07,  3.36it/s]

Epoch: 2, Loss: 3.3350510597229004


Processing epoch 01: 100%|█████████▉| 4826/4850 [23:10<00:07,  3.40it/s]

Epoch: 2, Loss: 2.0097384452819824


Processing epoch 01: 100%|█████████▉| 4827/4850 [23:10<00:06,  3.43it/s]

Epoch: 2, Loss: 1.880462884902954


Processing epoch 01: 100%|█████████▉| 4828/4850 [23:10<00:06,  3.44it/s]

Epoch: 2, Loss: 2.4204869270324707


Processing epoch 01: 100%|█████████▉| 4829/4850 [23:11<00:06,  3.46it/s]

Epoch: 2, Loss: 2.011964797973633


Processing epoch 01: 100%|█████████▉| 4830/4850 [23:11<00:05,  3.46it/s]

Epoch: 2, Loss: 1.8684914112091064


Processing epoch 01: 100%|█████████▉| 4831/4850 [23:11<00:05,  3.46it/s]

Epoch: 2, Loss: 2.2613110542297363


Processing epoch 01: 100%|█████████▉| 4832/4850 [23:11<00:05,  3.46it/s]

Epoch: 2, Loss: 2.5020809173583984


Processing epoch 01: 100%|█████████▉| 4833/4850 [23:12<00:04,  3.46it/s]

Epoch: 2, Loss: 2.3193745613098145


Processing epoch 01: 100%|█████████▉| 4834/4850 [23:12<00:04,  3.51it/s]

Epoch: 2, Loss: 3.3423595428466797


Processing epoch 01: 100%|█████████▉| 4835/4850 [23:12<00:04,  3.48it/s]

Epoch: 2, Loss: 2.688417911529541


Processing epoch 01: 100%|█████████▉| 4836/4850 [23:13<00:04,  3.47it/s]

Epoch: 2, Loss: 2.3875646591186523


Processing epoch 01: 100%|█████████▉| 4837/4850 [23:13<00:03,  3.47it/s]

Epoch: 2, Loss: 2.0197205543518066


Processing epoch 01: 100%|█████████▉| 4838/4850 [23:13<00:03,  3.48it/s]

Epoch: 2, Loss: 2.2525088787078857


Processing epoch 01: 100%|█████████▉| 4839/4850 [23:13<00:03,  3.47it/s]

Epoch: 2, Loss: 2.1380183696746826


Processing epoch 01: 100%|█████████▉| 4840/4850 [23:14<00:02,  3.49it/s]

Epoch: 2, Loss: 2.550740957260132


Processing epoch 01: 100%|█████████▉| 4841/4850 [23:14<00:02,  3.50it/s]

Epoch: 2, Loss: 2.1866512298583984


Processing epoch 01: 100%|█████████▉| 4842/4850 [23:14<00:02,  3.50it/s]

Epoch: 2, Loss: 2.8002381324768066


Processing epoch 01: 100%|█████████▉| 4843/4850 [23:15<00:02,  3.50it/s]

Epoch: 2, Loss: 2.5572104454040527


Processing epoch 01: 100%|█████████▉| 4844/4850 [23:15<00:01,  3.49it/s]

Epoch: 2, Loss: 2.0367817878723145


Processing epoch 01: 100%|█████████▉| 4845/4850 [23:15<00:01,  3.50it/s]

Epoch: 2, Loss: 2.5998778343200684


Processing epoch 01: 100%|█████████▉| 4846/4850 [23:15<00:01,  3.52it/s]

Epoch: 2, Loss: 2.787538528442383


Processing epoch 01: 100%|█████████▉| 4847/4850 [23:16<00:00,  3.53it/s]

Epoch: 2, Loss: 2.704514980316162


Processing epoch 01: 100%|█████████▉| 4848/4850 [23:16<00:00,  3.54it/s]

Epoch: 2, Loss: 2.2814323902130127


Processing epoch 01: 100%|██████████| 4850/4850 [23:17<00:00,  3.47it/s]

Epoch: 2, Loss: 2.5184497833251953
Epoch: 2, Loss: 3.604671001434326



Processing epoch 02:   0%|          | 1/4850 [00:00<29:38,  2.73it/s]

Epoch: 3, Loss: 2.274916172027588


Processing epoch 02:   0%|          | 2/4850 [00:00<25:42,  3.14it/s]

Epoch: 3, Loss: 2.400212287902832


Processing epoch 02:   0%|          | 3/4850 [00:00<24:27,  3.30it/s]

Epoch: 3, Loss: 2.6323533058166504


Processing epoch 02:   0%|          | 4/4850 [00:01<23:55,  3.38it/s]

Epoch: 3, Loss: 2.28784441947937


Processing epoch 02:   0%|          | 5/4850 [00:01<23:33,  3.43it/s]

Epoch: 3, Loss: 2.279484272003174


Processing epoch 02:   0%|          | 6/4850 [00:01<23:28,  3.44it/s]

Epoch: 3, Loss: 2.9264259338378906


Processing epoch 02:   0%|          | 7/4850 [00:02<23:10,  3.48it/s]

Epoch: 3, Loss: 2.3492274284362793


Processing epoch 02:   0%|          | 8/4850 [00:02<23:10,  3.48it/s]

Epoch: 3, Loss: 2.306100368499756


Processing epoch 02:   0%|          | 9/4850 [00:02<23:05,  3.49it/s]

Epoch: 3, Loss: 2.1482317447662354


Processing epoch 02:   0%|          | 10/4850 [00:02<23:00,  3.51it/s]

Epoch: 3, Loss: 2.440037488937378


Processing epoch 02:   0%|          | 11/4850 [00:03<22:53,  3.52it/s]

Epoch: 3, Loss: 2.0087664127349854


Processing epoch 02:   0%|          | 12/4850 [00:03<22:48,  3.54it/s]

Epoch: 3, Loss: 2.5579307079315186


Processing epoch 02:   0%|          | 13/4850 [00:03<22:51,  3.53it/s]

Epoch: 3, Loss: 2.208049774169922


Processing epoch 02:   0%|          | 14/4850 [00:04<22:48,  3.53it/s]

Epoch: 3, Loss: 2.444805145263672


Processing epoch 02:   0%|          | 15/4850 [00:04<22:51,  3.52it/s]

Epoch: 3, Loss: 2.3806986808776855


Processing epoch 02:   0%|          | 16/4850 [00:04<22:59,  3.50it/s]

Epoch: 3, Loss: 1.9344327449798584


Processing epoch 02:   0%|          | 17/4850 [00:04<23:11,  3.47it/s]

Epoch: 3, Loss: 1.955716609954834


Processing epoch 02:   0%|          | 18/4850 [00:05<23:02,  3.50it/s]

Epoch: 3, Loss: 2.6632587909698486


Processing epoch 02:   0%|          | 19/4850 [00:05<23:20,  3.45it/s]

Epoch: 3, Loss: 2.4526805877685547


Processing epoch 02:   0%|          | 20/4850 [00:05<23:05,  3.49it/s]

Epoch: 3, Loss: 2.3219046592712402


Processing epoch 02:   0%|          | 21/4850 [00:06<23:03,  3.49it/s]

Epoch: 3, Loss: 2.525419235229492


Processing epoch 02:   0%|          | 22/4850 [00:06<23:08,  3.48it/s]

Epoch: 3, Loss: 2.1332311630249023


Processing epoch 02:   0%|          | 23/4850 [00:06<23:07,  3.48it/s]

Epoch: 3, Loss: 2.101836681365967


Processing epoch 02:   0%|          | 24/4850 [00:06<23:18,  3.45it/s]

Epoch: 3, Loss: 2.3043012619018555


Processing epoch 02:   1%|          | 25/4850 [00:07<23:19,  3.45it/s]

Epoch: 3, Loss: 2.533848524093628


Processing epoch 02:   1%|          | 26/4850 [00:07<23:11,  3.47it/s]

Epoch: 3, Loss: 3.318228244781494


Processing epoch 02:   1%|          | 27/4850 [00:07<22:56,  3.50it/s]

Epoch: 3, Loss: 2.8696470260620117


Processing epoch 02:   1%|          | 28/4850 [00:08<22:55,  3.50it/s]

Epoch: 3, Loss: 2.206300735473633


Processing epoch 02:   1%|          | 29/4850 [00:08<22:57,  3.50it/s]

Epoch: 3, Loss: 2.1911075115203857


Processing epoch 02:   1%|          | 30/4850 [00:08<22:58,  3.50it/s]

Epoch: 3, Loss: 3.078824281692505


Processing epoch 02:   1%|          | 31/4850 [00:08<22:53,  3.51it/s]

Epoch: 3, Loss: 2.5230727195739746


Processing epoch 02:   1%|          | 32/4850 [00:09<22:55,  3.50it/s]

Epoch: 3, Loss: 1.8982088565826416


Processing epoch 02:   1%|          | 33/4850 [00:09<22:54,  3.50it/s]

Epoch: 3, Loss: 2.264331340789795


Processing epoch 02:   1%|          | 34/4850 [00:09<22:55,  3.50it/s]

Epoch: 3, Loss: 2.1126928329467773


Processing epoch 02:   1%|          | 35/4850 [00:10<23:11,  3.46it/s]

Epoch: 3, Loss: 2.1701600551605225


Processing epoch 02:   1%|          | 36/4850 [00:10<23:25,  3.43it/s]

Epoch: 3, Loss: 1.9268509149551392


Processing epoch 02:   1%|          | 37/4850 [00:10<23:47,  3.37it/s]

Epoch: 3, Loss: 2.3257789611816406


Processing epoch 02:   1%|          | 38/4850 [00:10<23:33,  3.40it/s]

Epoch: 3, Loss: 2.3651509284973145


Processing epoch 02:   1%|          | 39/4850 [00:11<23:54,  3.35it/s]

Epoch: 3, Loss: 2.305004119873047


Processing epoch 02:   1%|          | 40/4850 [00:11<23:43,  3.38it/s]

Epoch: 3, Loss: 2.3129873275756836


Processing epoch 02:   1%|          | 41/4850 [00:11<23:48,  3.37it/s]

Epoch: 3, Loss: 2.070197582244873


Processing epoch 02:   1%|          | 42/4850 [00:12<23:27,  3.41it/s]

Epoch: 3, Loss: 2.2211368083953857


Processing epoch 02:   1%|          | 43/4850 [00:12<23:40,  3.38it/s]

Epoch: 3, Loss: 2.1497879028320312


Processing epoch 02:   1%|          | 44/4850 [00:12<23:55,  3.35it/s]

Epoch: 3, Loss: 2.4296889305114746


Processing epoch 02:   1%|          | 45/4850 [00:13<23:54,  3.35it/s]

Epoch: 3, Loss: 2.023132801055908


Processing epoch 02:   1%|          | 46/4850 [00:13<23:55,  3.35it/s]

Epoch: 3, Loss: 2.714034080505371


Processing epoch 02:   1%|          | 47/4850 [00:13<23:42,  3.38it/s]

Epoch: 3, Loss: 2.4948434829711914


Processing epoch 02:   1%|          | 48/4850 [00:13<23:45,  3.37it/s]

Epoch: 3, Loss: 2.4254872798919678


Processing epoch 02:   1%|          | 49/4850 [00:14<23:40,  3.38it/s]

Epoch: 3, Loss: 2.3888678550720215


Processing epoch 02:   1%|          | 50/4850 [00:14<23:51,  3.35it/s]

Epoch: 3, Loss: 3.051556348800659


Processing epoch 02:   1%|          | 51/4850 [00:14<23:33,  3.40it/s]

Epoch: 3, Loss: 2.2042596340179443


Processing epoch 02:   1%|          | 52/4850 [00:15<23:19,  3.43it/s]

Epoch: 3, Loss: 2.942215919494629


Processing epoch 02:   1%|          | 53/4850 [00:15<23:21,  3.42it/s]

Epoch: 3, Loss: 2.3841137886047363


Processing epoch 02:   1%|          | 54/4850 [00:15<23:15,  3.44it/s]

Epoch: 3, Loss: 2.329555034637451


Processing epoch 02:   1%|          | 55/4850 [00:15<23:08,  3.45it/s]

Epoch: 3, Loss: 2.273256778717041


Processing epoch 02:   1%|          | 56/4850 [00:16<23:13,  3.44it/s]

Epoch: 3, Loss: 2.3700385093688965


Processing epoch 02:   1%|          | 57/4850 [00:16<23:17,  3.43it/s]

Epoch: 3, Loss: 2.277560234069824





KeyboardInterrupt: 

In [141]:
def validate(transformer, criterion, val_dataloader, tokenizer_src, tokenizer_tgt):
    transformer.eval()  # switch to evaluation mode
    transformer.load_state_dict(torch.load("model.pth"))
    total_loss = 0.0
    total_correct = 0
    total_predictions = 0

    with torch.no_grad():
        for batch in val_dataloader:
            encoder_input = batch["encoder_input"].to(device)
            decoder_input = batch["decoder_input"].to(device)
            labels = batch["label"].to(device)

            output = transformer(encoder_input, decoder_input)
            loss = criterion(output.view(-1, tgt_vocab_size), labels.view(-1))
            total_loss += loss.item()

            # Decode target, expected, and predicted sequences
            decoded_target = decode_sequence(labels, tokenizer_tgt)
            decoded_expected = decode_sequence(batch["label"], tokenizer_tgt)
            _, predicted = torch.max(output, 2)
            decoded_predicted = decode_sequence(predicted, tokenizer_tgt)
            source_input = decode_sequence(batch["encoder_input"], tokenizer_src)
            # Print target, expected, and predicted sequences
            print("Source:", source_input)
            print("Target:", decoded_target)
            print("Expected:", decoded_expected)
            print("Predicted:", decoded_predicted)
            print("***********************")

            # Calculate accuracy
            correct = ((predicted == labels) & (labels != tokenizer_tgt.token_to_id('[PAD]'))).sum().item()
            total_correct += correct
            total_predictions += labels.size(0) * labels.size(1)

    avg_loss = total_loss / len(val_dataloader)
    accuracy = total_correct / total_predictions

    print(f"Validation Loss: {avg_loss}, Accuracy: {accuracy}")

def decode_sequence(sequence, tokenizer):
    decoded_sequence = []
    for token in sequence.view(-1):
        token = token.item()
        if token == tokenizer.token_to_id('[PAD]'):
            break
        decoded_token = tokenizer.id_to_token(token)
        decoded_sequence.append(decoded_token)
    return " ".join(decoded_sequence)

# Usage example:
# Assuming you have `val_dataloader`, `tokenizer_src`, `tokenizer_tgt`, and `tgt_vocab_size` defined
# before and `criterion` defined as in your training loop
validate(transformer, criterion, val_dataloader, tokenizer_src, tokenizer_tgt)


Source: [SOS] " Not at all : they are full of jests and gaiety ." [EOS]
Target: — No davvero , sono anzi [UNK] . [EOS]
Expected: — No davvero , sono anzi [UNK] . [EOS]
Predicted: — No davvero , sono anzi [UNK] . [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [EOS] [E

KeyboardInterrupt: 