In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchtext
from torchtext.legacy.data import Field, BucketIterator, Iterator
from torchtext.legacy import data
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import spacy
import numpy as np
import pandas as pd
import random
import math
import time
from tokenize import tokenize, untokenize
import io
import keyword
import torch.nn.functional as F
from tqdm import tqdm
import pickle

In [2]:
spacy.load('en_core_web_sm')

<spacy.lang.en.English at 0x17705083e20>

In [3]:
f = open("english_python_data.txt", "r", encoding="utf8")
file_lines = f.readlines()

In [4]:
dps = []
dp = None
for line in file_lines:
  if line[0] == "#":
    if dp:
      dp['solution'] = ''.join(dp['solution'])
      dps.append(dp)
    dp = {"question": None, "solution": []}
    dp['question'] = line[1:]
  else:
    dp["solution"].append(line)

Tokenizer

In [5]:
def augment_tokenize_python_code(python_code_str, mask_factor=0.3):
    var_dict = {}
    skip_list = ['range', 'enumerate', 'print', 'ord', 'int', 'float', 'zip', 'char', 'list', 'dict', 'tuple', 'set', 'len', 'sum', 'min', 'max']
    skip_list.extend(keyword.kwlist)

    var_counter = 1
    python_tokens = list(tokenize(io.BytesIO(python_code_str.encode('utf-8')).readline))
    tokenized_output = []

    for i in range(0, len(python_tokens)):
      if python_tokens[i].type == 1 and python_tokens[i].string not in skip_list:
        
        if i>0 and python_tokens[i-1].string in ['def', '.', 'import', 'raise', 'except', 'class']: # avoid masking modules, functions and error literals
          skip_list.append(python_tokens[i].string)
          tokenized_output.append((python_tokens[i].type, python_tokens[i].string))
        elif python_tokens[i].string in var_dict:  # if variable is already masked
          tokenized_output.append((python_tokens[i].type, var_dict[python_tokens[i].string]))
        elif random.uniform(0, 1) > 1-mask_factor: # randomly mask variables
          var_dict[python_tokens[i].string] = 'var_' + str(var_counter)
          var_counter+=1
          tokenized_output.append((python_tokens[i].type, var_dict[python_tokens[i].string]))
        else:
          skip_list.append(python_tokens[i].string)
          tokenized_output.append((python_tokens[i].type, python_tokens[i].string))
      
      else:
        tokenized_output.append((python_tokens[i].type, python_tokens[i].string))
    
    return tokenized_output

Building Train and Validation Dataset

In [6]:
python_problems_df = pd.DataFrame(dps)

In [7]:
np.random.seed(0)
msk = np.random.rand(len(python_problems_df)) < 0.85 # Splitting data into 85% train and 15% validation

train_df = python_problems_df[msk]
val_df   = python_problems_df[~msk]

Creating vocabulary using torchtext

In [8]:
SEED = 1234

random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [9]:
Input = data.Field(tokenize = 'spacy',init_token='', eos_token='', lower=True)
Output = data.Field(tokenize = augment_tokenize_python_code, init_token='', eos_token='', lower=False)



In [10]:
with open('Vocab/src_vocab.pkl', 'rb') as f:
    Input.vocab = pickle.load(f)

with open('Vocab/trg_vocab.pkl', 'rb') as f:
    Output.vocab = pickle.load(f)

In [11]:
fields = [('Input', Input),('Output', Output)]

In [12]:
# train_example = []
# val_example = []

# train_expansion_factor = 100
# for j in range(train_expansion_factor):
#   for i in range(train_df.shape[0]):
#       try:
#           ex = data.Example.fromlist([train_df.question[i], train_df.solution[i]], fields)
#           train_example.append(ex)
#       except:
#           pass

# for i in range(val_df.shape[0]):
#     try:
#         ex = data.Example.fromlist([val_df.question[i], val_df.solution[i]], fields)
#         val_example.append(ex)
#     except:
#         pass


# train_data = data.Dataset(train_example, fields)
# valid_data = data.Dataset(val_example, fields)


# Input.build_vocab(train_data, min_freq = 0)
# Output.build_vocab(train_data, min_freq = 0)


# def save_vocab(vocab, path):
#     import pickle
#     output = open(path, 'wb')
#     pickle.dump(vocab, output)
#     output.close()


# save_vocab(Input.vocab, "Vocab/src_vocab.pkl")
# save_vocab(Output.vocab, "Vocab/trg_vocab.pkl")

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Transformer Architecture

In [14]:
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hid_dim, pf_dim, dropout):
        super().__init__()
        self.fc_1    = nn.Linear(hid_dim, pf_dim)
        self.fc_2    = nn.Linear(pf_dim, hid_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        x = self.dropout(torch.relu(self.fc_1(x)))
        x = self.fc_2(x)
        return x

In [15]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout, device):
        super().__init__()
        
        assert hid_dim % n_heads == 0
        
        self.hid_dim = hid_dim
        self.n_heads = n_heads
        self.head_dim = hid_dim // n_heads
        
        self.fc_q    = nn.Linear(hid_dim, hid_dim)
        self.fc_k    = nn.Linear(hid_dim, hid_dim)
        self.fc_v    = nn.Linear(hid_dim, hid_dim)
        self.fc_o    = nn.Linear(hid_dim, hid_dim)
        self.dropout = nn.Dropout(dropout)
        self.scale   = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
        
    def forward(self, query, key, value, mask = None):
        
        batch_size = query.shape[0]
                
        Q = self.fc_q(query)
        K = self.fc_k(key)
        V = self.fc_v(value)
                
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
                
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)
        
        attention = torch.softmax(energy, dim = -1)        
        x = torch.matmul(self.dropout(attention), V)
        x = x.permute(0, 2, 1, 3).contiguous()
        x = x.view(batch_size, -1, self.hid_dim)
        x = self.fc_o(x)
        
        return x, attention

In [16]:
class EncoderLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, pf_dim, dropout, device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, pf_dim, dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src, src_mask):
        _src, _ = self.self_attention(src, src, src, src_mask)
        src = self.self_attn_layer_norm(src + self.dropout(_src))
        _src = self.positionwise_feedforward(src)
        src = self.ff_layer_norm(src + self.dropout(_src))
        
        return src

In [17]:
class Encoder(nn.Module):
    def __init__(self, input_dim, hid_dim, n_layers, n_heads, pf_dim,dropout, device, max_length = 1000):
        super().__init__()

        self.device = device
        
        self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        self.layers        = nn.ModuleList([EncoderLayer(hid_dim, n_heads, pf_dim,dropout, device) for _ in range(n_layers)])
        self.dropout       = nn.Dropout(dropout)
        self.scale         = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, src, src_mask):
        batch_size = src.shape[0]
        src_len = src.shape[1]

        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
        
        for layer in self.layers:
            src = layer(src, src_mask)
            
        return src

In [18]:
class DecoderLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, pf_dim, dropout, device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.enc_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, pf_dim, dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
        _trg, _         = self.self_attention(trg, trg, trg, trg_mask)
        trg             = self.self_attn_layer_norm(trg + self.dropout(_trg))
        _trg, attention = self.encoder_attention(trg, enc_src, enc_src, src_mask)
        trg             = self.enc_attn_layer_norm(trg + self.dropout(_trg))
        _trg            = self.positionwise_feedforward(trg)
        trg             = self.ff_layer_norm(trg + self.dropout(_trg))
        
        return trg, attention

In [19]:
class Decoder(nn.Module):
    def __init__(self, output_dim, hid_dim, n_layers, n_heads, pf_dim, dropout, device,max_length = 10000):
        super().__init__()
        
        self.device = device
        self.tok_embedding = nn.Embedding(output_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        self.layers  = nn.ModuleList([DecoderLayer(hid_dim, n_heads, pf_dim, dropout, device)for _ in range(n_layers)])
        self.fc_out  = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.scale   = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, trg, enc_src, trg_mask, src_mask):
                
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        trg = self.dropout((self.tok_embedding(trg) * self.scale) + self.pos_embedding(pos))

        for layer in self.layers:
            trg, attention = layer(trg, enc_src, trg_mask, src_mask)
        
        output = self.fc_out(trg)
        return output, attention

In [20]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, src_pad_idx, trg_pad_idx, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
        return src_mask
    
    def make_trg_mask(self, trg):
        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(2)
        trg_len = trg.shape[1]
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = self.device)).bool()
        trg_mask = trg_pad_mask & trg_sub_mask
        
        return trg_mask

    def forward(self, src, trg):    
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        enc_src  = self.encoder(src, src_mask)   
        output, attention = self.decoder(trg, enc_src, trg_mask, src_mask)
        
        return output, attention

Training Setup

In [21]:
INPUT_DIM = len(Input.vocab)
OUTPUT_DIM = len(Output.vocab)
HID_DIM = 256
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 16
DEC_HEADS = 16
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

enc = Encoder(INPUT_DIM, HID_DIM, ENC_LAYERS, ENC_HEADS, ENC_PF_DIM, ENC_DROPOUT, device)
dec = Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM, DEC_DROPOUT, device)

In [22]:
SRC_PAD_IDX = Input.vocab.stoi[Input.pad_token]
TRG_PAD_IDX = Output.vocab.stoi[Output.pad_token]

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)

In [23]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 10,220,071 trainable parameters


In [24]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [25]:
model.apply(initialize_weights);

In [26]:
LEARNING_RATE = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [27]:
class CrossEntropyLoss(nn.CrossEntropyLoss):
    """CrossEntropyLoss - with ability to recieve distrbution as targets, and optional label smoothing"""

    def __init__(self, weight=None, ignore_index=-100, reduction='mean', smooth_eps=None, smooth_dist=None, from_logits=True):
        super(CrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction)
        self.smooth_eps = smooth_eps
        self.smooth_dist = smooth_dist
        self.from_logits = from_logits

    def forward(self, input, target, smooth_dist=None):
        if smooth_dist is None:
            smooth_dist = self.smooth_dist
        return cross_entropy(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction, smooth_eps=self.smooth_eps, smooth_dist=smooth_dist, from_logits=self.from_logits)


def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction='mean',
                  smooth_eps=None, smooth_dist=None, from_logits=True):
    """cross entropy loss, with support for target distributions and label smoothing https://arxiv.org/abs/1512.00567"""
    smooth_eps = smooth_eps or 0

    if _is_long(target) and smooth_eps == 0:
        if from_logits:
            return F.cross_entropy(inputs, target, weight, ignore_index=ignore_index, reduction=reduction)
        else:
            return F.nll_loss(inputs, target, weight, ignore_index=ignore_index, reduction=reduction)

    if from_logits:
        lsm = F.log_softmax(inputs, dim=-1)
    else:
        lsm = inputs

    masked_indices = None
    num_classes = inputs.size(-1)

    if _is_long(target) and ignore_index >= 0:
        masked_indices = target.eq(ignore_index)

    if smooth_eps > 0 and smooth_dist is not None:
        if _is_long(target):
            target = onehot(target, num_classes).type_as(inputs)
        if smooth_dist.dim() < target.dim():
            smooth_dist = smooth_dist.unsqueeze(0)
        target.lerp_(smooth_dist, smooth_eps)

    if weight is not None:
        lsm = lsm * weight.unsqueeze(0)

    if _is_long(target):
        eps_sum = smooth_eps / num_classes
        eps_nll = 1. - eps_sum - smooth_eps
        likelihood = lsm.gather(dim=-1, index=target.unsqueeze(-1)).squeeze(-1)
        loss = -(eps_nll * likelihood + eps_sum * lsm.sum(-1))
    else:
        loss = -(target * lsm).sum(-1)

    if masked_indices is not None:
        loss.masked_fill_(masked_indices, 0)

    if reduction == 'sum':
        loss = loss.sum()
    elif reduction == 'mean':
        if masked_indices is None:
            loss = loss.mean()
        else:
            loss = loss.sum() / float(loss.size(0) - masked_indices.sum())

    return loss


def onehot(indexes, N=None, ignore_index=None):
    if N is None:
        N = indexes.max() + 1
    sz = list(indexes.size())
    output = indexes.new().byte().resize_(*sz, N).zero_()
    output.scatter_(-1, indexes.unsqueeze(-1), 1)
    if ignore_index is not None and ignore_index >= 0:
        output.masked_fill_(indexes.eq(ignore_index).unsqueeze(-1), 0)
    return output


def _is_long(x):
    if hasattr(x, 'data'):
        x = x.data
    return isinstance(x, torch.LongTensor) or isinstance(x, torch.cuda.LongTensor)

In [28]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = CrossEntropyLoss(ignore_index = TRG_PAD_IDX, smooth_eps=0.20)
    loss = crossEntropy(inp, target)
    loss = loss.to(device)
    return loss, nTotal.item()

In [29]:
criterion = maskNLLLoss

TRAINING

In [30]:
def make_trg_mask(trg):
        trg_pad_mask = (trg != TRG_PAD_IDX).unsqueeze(1).unsqueeze(2)
        trg_len = trg.shape[1]
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = device)).bool()
        trg_mask = trg_pad_mask & trg_sub_mask
        return trg_mask

def train(model, iterator, optimizer, criterion, clip):
    model.train()
    n_totals = 0
    print_losses = []
    for i, batch in tqdm(enumerate(iterator), total=len(iterator)):
        loss = 0
        src = batch.Input.permute(1, 0)
        trg = batch.Output.permute(1, 0)
        trg_mask = make_trg_mask(trg)
        optimizer.zero_grad()
        output, _ = model(src, trg[:,:-1])
        output_dim = output.shape[-1]
        output = output.contiguous().view(-1, output_dim)
        trg = trg[:,1:].contiguous().view(-1)   
        mask_loss, nTotal = criterion(output, trg, trg_mask)
        mask_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        print_losses.append(mask_loss.item() * nTotal)
        n_totals += nTotal
    return sum(print_losses) / n_totals

In [31]:
def evaluate(model, iterator, criterion):
    model.eval()
    n_totals = 0
    print_losses = []
    
    with torch.no_grad():
        for i, batch in tqdm(enumerate(iterator), total=len(iterator)):
            src = batch.Input.permute(1, 0)
            trg = batch.Output.permute(1, 0)
            trg_mask = make_trg_mask(trg)
            output, _ = model(src, trg[:,:-1])
            output_dim = output.shape[-1]
            output = output.contiguous().view(-1, output_dim)
            trg = trg[:,1:].contiguous().view(-1)
            mask_loss, nTotal = criterion(output, trg, trg_mask)
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    return sum(print_losses) / n_totals

In [32]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [33]:
N_EPOCHS = 1000
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    start_time = time.time()
    train_example = []
    val_example = []

    for i in range(train_df.shape[0]):
        try:
            ex = data.Example.fromlist([train_df.question[i], train_df.solution[i]], fields)
            train_example.append(ex)
        except:
            pass

    for i in range(val_df.shape[0]):
        try:
            ex = data.Example.fromlist([val_df.question[i], val_df.solution[i]], fields)
            val_example.append(ex)
        except:
            pass       

    train_data = data.Dataset(train_example, fields)
    valid_data = data.Dataset(val_example, fields)

    BATCH_SIZE = 16
    train_iterator, valid_iterator = BucketIterator.splits((train_data, valid_data), batch_size = BATCH_SIZE, sort_key = lambda x: len(x.Input), sort_within_batch=True, device = device)

    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'saved/model.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f}')

100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.73it/s]


Epoch: 01 | Time: 4m 1s
	Train Loss: 5.137
	 Val. Loss: 4.417


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 02 | Time: 3m 59s
	Train Loss: 4.207
	 Val. Loss: 4.169


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 03 | Time: 4m 0s
	Train Loss: 3.987
	 Val. Loss: 4.052


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 04 | Time: 4m 0s
	Train Loss: 3.833
	 Val. Loss: 4.005


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 05 | Time: 4m 1s
	Train Loss: 3.703
	 Val. Loss: 3.877


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 06 | Time: 3m 57s
	Train Loss: 3.598
	 Val. Loss: 3.836


100%|██████████| 222/222 [04:03<00:00,  1.10s/it]
100%|██████████| 8/8 [00:02<00:00,  2.72it/s]


Epoch: 07 | Time: 4m 8s
	Train Loss: 3.510
	 Val. Loss: 3.789


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 08 | Time: 3m 59s
	Train Loss: 3.432
	 Val. Loss: 3.752


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch: 09 | Time: 3m 59s
	Train Loss: 3.356
	 Val. Loss: 3.702


100%|██████████| 222/222 [04:00<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 10 | Time: 4m 4s
	Train Loss: 3.300
	 Val. Loss: 3.668


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 11 | Time: 4m 0s
	Train Loss: 3.243
	 Val. Loss: 3.650


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 12 | Time: 3m 59s
	Train Loss: 3.194
	 Val. Loss: 3.604


100%|██████████| 222/222 [03:59<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 13 | Time: 4m 3s
	Train Loss: 3.162
	 Val. Loss: 3.631


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 14 | Time: 4m 2s
	Train Loss: 3.129
	 Val. Loss: 3.616


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 15 | Time: 3m 59s
	Train Loss: 3.092
	 Val. Loss: 3.601


100%|██████████| 222/222 [04:02<00:00,  1.09s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 16 | Time: 4m 6s
	Train Loss: 3.064
	 Val. Loss: 3.619


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch: 17 | Time: 3m 54s
	Train Loss: 3.045
	 Val. Loss: 3.594


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 18 | Time: 4m 2s
	Train Loss: 3.021
	 Val. Loss: 3.598


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch: 19 | Time: 4m 0s
	Train Loss: 3.000
	 Val. Loss: 3.536


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 20 | Time: 3m 54s
	Train Loss: 2.981
	 Val. Loss: 3.566


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 21 | Time: 3m 59s
	Train Loss: 2.962
	 Val. Loss: 3.592


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 22 | Time: 3m 57s
	Train Loss: 2.945
	 Val. Loss: 3.539


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 23 | Time: 4m 1s
	Train Loss: 2.931
	 Val. Loss: 3.533


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 24 | Time: 3m 55s
	Train Loss: 2.923
	 Val. Loss: 3.521


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 25 | Time: 3m 56s
	Train Loss: 2.904
	 Val. Loss: 3.584


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 26 | Time: 3m 57s
	Train Loss: 2.893
	 Val. Loss: 3.519


100%|██████████| 222/222 [03:58<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 27 | Time: 4m 3s
	Train Loss: 2.882
	 Val. Loss: 3.526


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.75it/s]


Epoch: 28 | Time: 4m 0s
	Train Loss: 2.867
	 Val. Loss: 3.548


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.75it/s]


Epoch: 29 | Time: 4m 2s
	Train Loss: 2.860
	 Val. Loss: 3.547


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 30 | Time: 3m 54s
	Train Loss: 2.853
	 Val. Loss: 3.517


100%|██████████| 222/222 [03:48<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch: 31 | Time: 3m 53s
	Train Loss: 2.847
	 Val. Loss: 3.501


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.73it/s]


Epoch: 32 | Time: 4m 0s
	Train Loss: 2.842
	 Val. Loss: 3.495


100%|██████████| 222/222 [04:01<00:00,  1.09s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 33 | Time: 4m 6s
	Train Loss: 2.834
	 Val. Loss: 3.537


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 34 | Time: 4m 2s
	Train Loss: 2.822
	 Val. Loss: 3.551


100%|██████████| 222/222 [04:03<00:00,  1.10s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 35 | Time: 4m 8s
	Train Loss: 2.817
	 Val. Loss: 3.509


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 36 | Time: 3m 59s
	Train Loss: 2.807
	 Val. Loss: 3.541


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 37 | Time: 4m 1s
	Train Loss: 2.799
	 Val. Loss: 3.501


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 38 | Time: 3m 53s
	Train Loss: 2.793
	 Val. Loss: 3.537


100%|██████████| 222/222 [03:48<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 39 | Time: 3m 52s
	Train Loss: 2.787
	 Val. Loss: 3.541


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 40 | Time: 4m 1s
	Train Loss: 2.779
	 Val. Loss: 3.550


100%|██████████| 222/222 [03:48<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 41 | Time: 3m 53s
	Train Loss: 2.782
	 Val. Loss: 3.511


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 42 | Time: 3m 53s
	Train Loss: 2.768
	 Val. Loss: 3.544


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 43 | Time: 3m 57s
	Train Loss: 2.763
	 Val. Loss: 3.508


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 44 | Time: 3m 59s
	Train Loss: 2.760
	 Val. Loss: 3.518


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 45 | Time: 3m 56s
	Train Loss: 2.753
	 Val. Loss: 3.514


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.86it/s]


Epoch: 46 | Time: 3m 59s
	Train Loss: 2.750
	 Val. Loss: 3.519


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 47 | Time: 3m 56s
	Train Loss: 2.746
	 Val. Loss: 3.491


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 48 | Time: 3m 57s
	Train Loss: 2.737
	 Val. Loss: 3.522


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 49 | Time: 3m 55s
	Train Loss: 2.738
	 Val. Loss: 3.507


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 50 | Time: 4m 2s
	Train Loss: 2.726
	 Val. Loss: 3.516


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 51 | Time: 3m 57s
	Train Loss: 2.733
	 Val. Loss: 3.547


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 52 | Time: 3m 58s
	Train Loss: 2.724
	 Val. Loss: 3.549


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 53 | Time: 3m 57s
	Train Loss: 2.722
	 Val. Loss: 3.542


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 54 | Time: 3m 58s
	Train Loss: 2.716
	 Val. Loss: 3.499


100%|██████████| 222/222 [03:48<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.91it/s]


Epoch: 55 | Time: 3m 52s
	Train Loss: 2.712
	 Val. Loss: 3.510


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 56 | Time: 3m 57s
	Train Loss: 2.715
	 Val. Loss: 3.531


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 57 | Time: 4m 2s
	Train Loss: 2.701
	 Val. Loss: 3.532


100%|██████████| 222/222 [03:59<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 58 | Time: 4m 4s
	Train Loss: 2.705
	 Val. Loss: 3.540


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 59 | Time: 3m 59s
	Train Loss: 2.699
	 Val. Loss: 3.497


100%|██████████| 222/222 [03:59<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 60 | Time: 4m 3s
	Train Loss: 2.692
	 Val. Loss: 3.528


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 61 | Time: 3m 59s
	Train Loss: 2.695
	 Val. Loss: 3.526


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 62 | Time: 3m 58s
	Train Loss: 2.692
	 Val. Loss: 3.513


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 63 | Time: 4m 2s
	Train Loss: 2.685
	 Val. Loss: 3.514


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 64 | Time: 4m 2s
	Train Loss: 2.685
	 Val. Loss: 3.495


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 65 | Time: 3m 57s
	Train Loss: 2.685
	 Val. Loss: 3.504


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 66 | Time: 4m 2s
	Train Loss: 2.692
	 Val. Loss: 3.500


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 67 | Time: 4m 2s
	Train Loss: 2.687
	 Val. Loss: 3.505


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 68 | Time: 3m 59s
	Train Loss: 2.680
	 Val. Loss: 3.500


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 69 | Time: 3m 56s
	Train Loss: 2.678
	 Val. Loss: 3.525


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.93it/s]


Epoch: 70 | Time: 4m 0s
	Train Loss: 2.667
	 Val. Loss: 3.525


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 71 | Time: 3m 56s
	Train Loss: 2.670
	 Val. Loss: 3.488


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 72 | Time: 4m 1s
	Train Loss: 2.666
	 Val. Loss: 3.542


100%|██████████| 222/222 [03:49<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 73 | Time: 3m 54s
	Train Loss: 2.670
	 Val. Loss: 3.562


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 74 | Time: 3m 58s
	Train Loss: 2.665
	 Val. Loss: 3.510


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 75 | Time: 3m 59s
	Train Loss: 2.661
	 Val. Loss: 3.550


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 76 | Time: 3m 58s
	Train Loss: 2.664
	 Val. Loss: 3.495


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 77 | Time: 3m 59s
	Train Loss: 2.653
	 Val. Loss: 3.544


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 78 | Time: 3m 59s
	Train Loss: 2.651
	 Val. Loss: 3.539


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 79 | Time: 3m 57s
	Train Loss: 2.657
	 Val. Loss: 3.508


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 80 | Time: 4m 0s
	Train Loss: 2.646
	 Val. Loss: 3.534


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 81 | Time: 3m 59s
	Train Loss: 2.649
	 Val. Loss: 3.496


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 82 | Time: 4m 2s
	Train Loss: 2.640
	 Val. Loss: 3.495


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 83 | Time: 3m 56s
	Train Loss: 2.650
	 Val. Loss: 3.533


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 84 | Time: 3m 57s
	Train Loss: 2.641
	 Val. Loss: 3.524


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 85 | Time: 3m 56s
	Train Loss: 2.641
	 Val. Loss: 3.502


100%|██████████| 222/222 [03:44<00:00,  1.01s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 86 | Time: 3m 49s
	Train Loss: 2.643
	 Val. Loss: 3.539


100%|██████████| 222/222 [03:54<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 87 | Time: 3m 58s
	Train Loss: 2.637
	 Val. Loss: 3.533


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 88 | Time: 3m 57s
	Train Loss: 2.635
	 Val. Loss: 3.491


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.87it/s]


Epoch: 89 | Time: 3m 57s
	Train Loss: 2.634
	 Val. Loss: 3.508


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 90 | Time: 3m 56s
	Train Loss: 2.635
	 Val. Loss: 3.552


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 91 | Time: 3m 59s
	Train Loss: 2.628
	 Val. Loss: 3.517


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 92 | Time: 4m 0s
	Train Loss: 2.637
	 Val. Loss: 3.533


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.95it/s]


Epoch: 93 | Time: 3m 57s
	Train Loss: 2.629
	 Val. Loss: 3.515


100%|██████████| 222/222 [03:49<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 94 | Time: 3m 54s
	Train Loss: 2.622
	 Val. Loss: 3.546


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 95 | Time: 3m 59s
	Train Loss: 2.627
	 Val. Loss: 3.579


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 96 | Time: 3m 58s
	Train Loss: 2.622
	 Val. Loss: 3.506


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch: 97 | Time: 3m 57s
	Train Loss: 2.624
	 Val. Loss: 3.537


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 98 | Time: 4m 2s
	Train Loss: 2.623
	 Val. Loss: 3.529


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 99 | Time: 3m 53s
	Train Loss: 2.620
	 Val. Loss: 3.498


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 100 | Time: 3m 58s
	Train Loss: 2.620
	 Val. Loss: 3.510


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 101 | Time: 3m 57s
	Train Loss: 2.625
	 Val. Loss: 3.493


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 102 | Time: 4m 0s
	Train Loss: 2.622
	 Val. Loss: 3.528


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.86it/s]


Epoch: 103 | Time: 4m 0s
	Train Loss: 2.616
	 Val. Loss: 3.539


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 104 | Time: 4m 0s
	Train Loss: 2.618
	 Val. Loss: 3.557


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 105 | Time: 3m 56s
	Train Loss: 2.612
	 Val. Loss: 3.549


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 106 | Time: 3m 57s
	Train Loss: 2.610
	 Val. Loss: 3.569


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 107 | Time: 3m 56s
	Train Loss: 2.617
	 Val. Loss: 3.490


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 108 | Time: 3m 56s
	Train Loss: 2.613
	 Val. Loss: 3.519


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.90it/s]


Epoch: 109 | Time: 3m 53s
	Train Loss: 2.612
	 Val. Loss: 3.532


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 110 | Time: 3m 57s
	Train Loss: 2.609
	 Val. Loss: 3.528


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 111 | Time: 3m 59s
	Train Loss: 2.606
	 Val. Loss: 3.535


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 112 | Time: 4m 0s
	Train Loss: 2.605
	 Val. Loss: 3.540


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 113 | Time: 3m 59s
	Train Loss: 2.610
	 Val. Loss: 3.555


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 114 | Time: 4m 1s
	Train Loss: 2.601
	 Val. Loss: 3.537


100%|██████████| 222/222 [04:00<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 115 | Time: 4m 4s
	Train Loss: 2.600
	 Val. Loss: 3.537


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 116 | Time: 4m 0s
	Train Loss: 2.605
	 Val. Loss: 3.514


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 117 | Time: 3m 56s
	Train Loss: 2.600
	 Val. Loss: 3.538


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 118 | Time: 4m 0s
	Train Loss: 2.599
	 Val. Loss: 3.535


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 119 | Time: 3m 56s
	Train Loss: 2.601
	 Val. Loss: 3.525


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 120 | Time: 3m 55s
	Train Loss: 2.598
	 Val. Loss: 3.538


100%|██████████| 222/222 [03:49<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 121 | Time: 3m 54s
	Train Loss: 2.599
	 Val. Loss: 3.533


100%|██████████| 222/222 [04:01<00:00,  1.09s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 122 | Time: 4m 5s
	Train Loss: 2.594
	 Val. Loss: 3.515


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 123 | Time: 3m 57s
	Train Loss: 2.596
	 Val. Loss: 3.516


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 124 | Time: 3m 59s
	Train Loss: 2.598
	 Val. Loss: 3.511


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.87it/s]


Epoch: 125 | Time: 3m 58s
	Train Loss: 2.593
	 Val. Loss: 3.523


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 126 | Time: 4m 0s
	Train Loss: 2.599
	 Val. Loss: 3.521


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 127 | Time: 3m 56s
	Train Loss: 2.594
	 Val. Loss: 3.545


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 128 | Time: 3m 55s
	Train Loss: 2.589
	 Val. Loss: 3.540


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 129 | Time: 3m 55s
	Train Loss: 2.592
	 Val. Loss: 3.531


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 130 | Time: 3m 55s
	Train Loss: 2.589
	 Val. Loss: 3.548


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 131 | Time: 4m 1s
	Train Loss: 2.589
	 Val. Loss: 3.557


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 132 | Time: 4m 2s
	Train Loss: 2.587
	 Val. Loss: 3.543


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 133 | Time: 3m 56s
	Train Loss: 2.588
	 Val. Loss: 3.538


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 134 | Time: 3m 55s
	Train Loss: 2.589
	 Val. Loss: 3.547


100%|██████████| 222/222 [03:46<00:00,  1.02s/it]
100%|██████████| 8/8 [00:02<00:00,  2.75it/s]


Epoch: 135 | Time: 3m 50s
	Train Loss: 2.589
	 Val. Loss: 3.546


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 136 | Time: 4m 2s
	Train Loss: 2.587
	 Val. Loss: 3.552


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 137 | Time: 4m 1s
	Train Loss: 2.578
	 Val. Loss: 3.533


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 138 | Time: 3m 59s
	Train Loss: 2.581
	 Val. Loss: 3.588


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 139 | Time: 3m 56s
	Train Loss: 2.581
	 Val. Loss: 3.558


100%|██████████| 222/222 [03:54<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 140 | Time: 3m 58s
	Train Loss: 2.587
	 Val. Loss: 3.535


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 141 | Time: 3m 57s
	Train Loss: 2.582
	 Val. Loss: 3.534


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 142 | Time: 4m 0s
	Train Loss: 2.584
	 Val. Loss: 3.532


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 143 | Time: 4m 0s
	Train Loss: 2.578
	 Val. Loss: 3.563


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 144 | Time: 4m 2s
	Train Loss: 2.579
	 Val. Loss: 3.557


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 145 | Time: 3m 59s
	Train Loss: 2.578
	 Val. Loss: 3.538


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 146 | Time: 3m 58s
	Train Loss: 2.583
	 Val. Loss: 3.573


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 147 | Time: 3m 57s
	Train Loss: 2.579
	 Val. Loss: 3.533


100%|██████████| 222/222 [03:59<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 148 | Time: 4m 4s
	Train Loss: 2.582
	 Val. Loss: 3.570


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 149 | Time: 3m 55s
	Train Loss: 2.584
	 Val. Loss: 3.586


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 150 | Time: 3m 59s
	Train Loss: 2.578
	 Val. Loss: 3.519


100%|██████████| 222/222 [03:54<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 151 | Time: 3m 58s
	Train Loss: 2.575
	 Val. Loss: 3.527


100%|██████████| 222/222 [03:54<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 152 | Time: 3m 58s
	Train Loss: 2.575
	 Val. Loss: 3.548


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 153 | Time: 4m 0s
	Train Loss: 2.579
	 Val. Loss: 3.540


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 154 | Time: 3m 58s
	Train Loss: 2.578
	 Val. Loss: 3.532


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 155 | Time: 3m 56s
	Train Loss: 2.574
	 Val. Loss: 3.514


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 156 | Time: 4m 0s
	Train Loss: 2.571
	 Val. Loss: 3.547


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 157 | Time: 4m 2s
	Train Loss: 2.579
	 Val. Loss: 3.526


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 158 | Time: 4m 1s
	Train Loss: 2.572
	 Val. Loss: 3.557


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 159 | Time: 3m 56s
	Train Loss: 2.573
	 Val. Loss: 3.556


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 160 | Time: 4m 0s
	Train Loss: 2.572
	 Val. Loss: 3.549


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 161 | Time: 3m 57s
	Train Loss: 2.572
	 Val. Loss: 3.567


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 162 | Time: 3m 58s
	Train Loss: 2.569
	 Val. Loss: 3.546


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 163 | Time: 3m 54s
	Train Loss: 2.572
	 Val. Loss: 3.561


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 164 | Time: 3m 58s
	Train Loss: 2.572
	 Val. Loss: 3.544


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 165 | Time: 3m 56s
	Train Loss: 2.574
	 Val. Loss: 3.546


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 166 | Time: 3m 54s
	Train Loss: 2.573
	 Val. Loss: 3.539


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 167 | Time: 3m 57s
	Train Loss: 2.571
	 Val. Loss: 3.544


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.87it/s]


Epoch: 168 | Time: 3m 59s
	Train Loss: 2.576
	 Val. Loss: 3.515


100%|██████████| 222/222 [03:48<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 169 | Time: 3m 52s
	Train Loss: 2.567
	 Val. Loss: 3.532


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 170 | Time: 3m 59s
	Train Loss: 2.566
	 Val. Loss: 3.543


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 171 | Time: 4m 1s
	Train Loss: 2.567
	 Val. Loss: 3.545


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 172 | Time: 3m 58s
	Train Loss: 2.572
	 Val. Loss: 3.532


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 173 | Time: 3m 58s
	Train Loss: 2.565
	 Val. Loss: 3.538


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 174 | Time: 3m 56s
	Train Loss: 2.566
	 Val. Loss: 3.559


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 175 | Time: 4m 3s
	Train Loss: 2.569
	 Val. Loss: 3.526


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 176 | Time: 4m 0s
	Train Loss: 2.562
	 Val. Loss: 3.559


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 177 | Time: 3m 57s
	Train Loss: 2.568
	 Val. Loss: 3.587


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 178 | Time: 3m 58s
	Train Loss: 2.568
	 Val. Loss: 3.558


100%|██████████| 222/222 [03:59<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 179 | Time: 4m 3s
	Train Loss: 2.565
	 Val. Loss: 3.559


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 180 | Time: 3m 56s
	Train Loss: 2.570
	 Val. Loss: 3.539


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 181 | Time: 4m 1s
	Train Loss: 2.561
	 Val. Loss: 3.578


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch: 182 | Time: 4m 1s
	Train Loss: 2.567
	 Val. Loss: 3.527


100%|██████████| 222/222 [03:58<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 183 | Time: 4m 3s
	Train Loss: 2.563
	 Val. Loss: 3.549


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 184 | Time: 4m 0s
	Train Loss: 2.566
	 Val. Loss: 3.544


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 185 | Time: 3m 57s
	Train Loss: 2.562
	 Val. Loss: 3.552


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 186 | Time: 4m 2s
	Train Loss: 2.564
	 Val. Loss: 3.577


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 187 | Time: 3m 56s
	Train Loss: 2.565
	 Val. Loss: 3.513


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 188 | Time: 3m 57s
	Train Loss: 2.561
	 Val. Loss: 3.564


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 189 | Time: 3m 59s
	Train Loss: 2.559
	 Val. Loss: 3.546


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 190 | Time: 3m 56s
	Train Loss: 2.565
	 Val. Loss: 3.547


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 191 | Time: 4m 2s
	Train Loss: 2.560
	 Val. Loss: 3.556


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 192 | Time: 4m 1s
	Train Loss: 2.558
	 Val. Loss: 3.571


100%|██████████| 222/222 [03:47<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 193 | Time: 3m 52s
	Train Loss: 2.555
	 Val. Loss: 3.558


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 194 | Time: 3m 54s
	Train Loss: 2.561
	 Val. Loss: 3.573


100%|██████████| 222/222 [03:59<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 195 | Time: 4m 4s
	Train Loss: 2.561
	 Val. Loss: 3.573


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.73it/s]


Epoch: 196 | Time: 4m 1s
	Train Loss: 2.560
	 Val. Loss: 3.558


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 197 | Time: 4m 1s
	Train Loss: 2.557
	 Val. Loss: 3.519


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 198 | Time: 3m 58s
	Train Loss: 2.558
	 Val. Loss: 3.529


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 199 | Time: 3m 57s
	Train Loss: 2.555
	 Val. Loss: 3.568


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 200 | Time: 3m 58s
	Train Loss: 2.550
	 Val. Loss: 3.552


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 201 | Time: 3m 58s
	Train Loss: 2.556
	 Val. Loss: 3.545


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 202 | Time: 3m 59s
	Train Loss: 2.557
	 Val. Loss: 3.550


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 203 | Time: 3m 56s
	Train Loss: 2.557
	 Val. Loss: 3.549


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 204 | Time: 3m 58s
	Train Loss: 2.553
	 Val. Loss: 3.566


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 205 | Time: 4m 0s
	Train Loss: 2.556
	 Val. Loss: 3.532


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.87it/s]


Epoch: 206 | Time: 4m 0s
	Train Loss: 2.555
	 Val. Loss: 3.536


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 207 | Time: 4m 1s
	Train Loss: 2.554
	 Val. Loss: 3.546


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 208 | Time: 3m 57s
	Train Loss: 2.556
	 Val. Loss: 3.539


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 209 | Time: 3m 59s
	Train Loss: 2.552
	 Val. Loss: 3.538


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 210 | Time: 4m 2s
	Train Loss: 2.552
	 Val. Loss: 3.517


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 211 | Time: 4m 1s
	Train Loss: 2.554
	 Val. Loss: 3.553


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 212 | Time: 4m 0s
	Train Loss: 2.557
	 Val. Loss: 3.544


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 213 | Time: 3m 58s
	Train Loss: 2.555
	 Val. Loss: 3.568


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.71it/s]


Epoch: 214 | Time: 4m 1s
	Train Loss: 2.557
	 Val. Loss: 3.544


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 215 | Time: 3m 54s
	Train Loss: 2.553
	 Val. Loss: 3.562


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 216 | Time: 3m 57s
	Train Loss: 2.555
	 Val. Loss: 3.561


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 217 | Time: 3m 56s
	Train Loss: 2.551
	 Val. Loss: 3.562


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.88it/s]


Epoch: 218 | Time: 4m 1s
	Train Loss: 2.552
	 Val. Loss: 3.559


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 219 | Time: 4m 0s
	Train Loss: 2.552
	 Val. Loss: 3.560


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 220 | Time: 3m 56s
	Train Loss: 2.553
	 Val. Loss: 3.575


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 221 | Time: 3m 57s
	Train Loss: 2.551
	 Val. Loss: 3.558


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.86it/s]


Epoch: 222 | Time: 4m 0s
	Train Loss: 2.553
	 Val. Loss: 3.534


100%|██████████| 222/222 [04:00<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 223 | Time: 4m 4s
	Train Loss: 2.549
	 Val. Loss: 3.566


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 224 | Time: 3m 59s
	Train Loss: 2.549
	 Val. Loss: 3.546


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 225 | Time: 4m 2s
	Train Loss: 2.549
	 Val. Loss: 3.568


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 226 | Time: 4m 2s
	Train Loss: 2.552
	 Val. Loss: 3.577


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 227 | Time: 4m 2s
	Train Loss: 2.548
	 Val. Loss: 3.595


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 228 | Time: 4m 2s
	Train Loss: 2.549
	 Val. Loss: 3.548


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 229 | Time: 3m 54s
	Train Loss: 2.552
	 Val. Loss: 3.554


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 230 | Time: 3m 59s
	Train Loss: 2.550
	 Val. Loss: 3.562


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 231 | Time: 3m 54s
	Train Loss: 2.549
	 Val. Loss: 3.574


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 232 | Time: 3m 57s
	Train Loss: 2.544
	 Val. Loss: 3.553


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 233 | Time: 4m 1s
	Train Loss: 2.547
	 Val. Loss: 3.573


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 234 | Time: 4m 1s
	Train Loss: 2.546
	 Val. Loss: 3.571


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.74it/s]


Epoch: 235 | Time: 3m 59s
	Train Loss: 2.553
	 Val. Loss: 3.551


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 236 | Time: 3m 58s
	Train Loss: 2.549
	 Val. Loss: 3.587


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 237 | Time: 3m 59s
	Train Loss: 2.546
	 Val. Loss: 3.581


100%|██████████| 222/222 [03:56<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 238 | Time: 4m 0s
	Train Loss: 2.549
	 Val. Loss: 3.580


100%|██████████| 222/222 [03:54<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 239 | Time: 3m 59s
	Train Loss: 2.544
	 Val. Loss: 3.556


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 240 | Time: 3m 59s
	Train Loss: 2.545
	 Val. Loss: 3.553


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.93it/s]


Epoch: 241 | Time: 3m 53s
	Train Loss: 2.544
	 Val. Loss: 3.572


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 242 | Time: 3m 56s
	Train Loss: 2.545
	 Val. Loss: 3.522


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 243 | Time: 3m 56s
	Train Loss: 2.545
	 Val. Loss: 3.572


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 244 | Time: 3m 57s
	Train Loss: 2.548
	 Val. Loss: 3.548


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.69it/s]


Epoch: 245 | Time: 3m 54s
	Train Loss: 2.547
	 Val. Loss: 3.591


100%|██████████| 222/222 [03:55<00:00,  1.06s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 246 | Time: 3m 59s
	Train Loss: 2.545
	 Val. Loss: 3.548


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.82it/s]


Epoch: 247 | Time: 4m 2s
	Train Loss: 2.544
	 Val. Loss: 3.551


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.76it/s]


Epoch: 248 | Time: 3m 57s
	Train Loss: 2.543
	 Val. Loss: 3.584


100%|██████████| 222/222 [03:57<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.78it/s]


Epoch: 249 | Time: 4m 1s
	Train Loss: 2.544
	 Val. Loss: 3.590


100%|██████████| 222/222 [03:54<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.75it/s]


Epoch: 250 | Time: 3m 58s
	Train Loss: 2.543
	 Val. Loss: 3.552


100%|██████████| 222/222 [03:56<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 251 | Time: 4m 0s
	Train Loss: 2.545
	 Val. Loss: 3.543


100%|██████████| 222/222 [03:58<00:00,  1.08s/it]
100%|██████████| 8/8 [00:02<00:00,  2.88it/s]


Epoch: 252 | Time: 4m 3s
	Train Loss: 2.544
	 Val. Loss: 3.576


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 253 | Time: 3m 56s
	Train Loss: 2.542
	 Val. Loss: 3.564


100%|██████████| 222/222 [03:52<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 254 | Time: 3m 57s
	Train Loss: 2.537
	 Val. Loss: 3.591


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.83it/s]


Epoch: 255 | Time: 3m 56s
	Train Loss: 2.541
	 Val. Loss: 3.579


100%|██████████| 222/222 [03:54<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.87it/s]


Epoch: 256 | Time: 3m 58s
	Train Loss: 2.542
	 Val. Loss: 3.545


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 257 | Time: 3m 54s
	Train Loss: 2.543
	 Val. Loss: 3.576


100%|██████████| 222/222 [03:49<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 258 | Time: 3m 54s
	Train Loss: 2.544
	 Val. Loss: 3.573


100%|██████████| 222/222 [03:53<00:00,  1.05s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 259 | Time: 3m 57s
	Train Loss: 2.538
	 Val. Loss: 3.602


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.84it/s]


Epoch: 260 | Time: 3m 55s
	Train Loss: 2.544
	 Val. Loss: 3.588


100%|██████████| 222/222 [03:48<00:00,  1.03s/it]
100%|██████████| 8/8 [00:02<00:00,  2.88it/s]


Epoch: 261 | Time: 3m 52s
	Train Loss: 2.538
	 Val. Loss: 3.578


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.77it/s]


Epoch: 262 | Time: 3m 54s
	Train Loss: 2.546
	 Val. Loss: 3.563


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.81it/s]


Epoch: 263 | Time: 3m 55s
	Train Loss: 2.541
	 Val. Loss: 3.573


100%|██████████| 222/222 [03:51<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.85it/s]


Epoch: 264 | Time: 3m 55s
	Train Loss: 2.540
	 Val. Loss: 3.559


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.80it/s]


Epoch: 265 | Time: 3m 55s
	Train Loss: 2.540
	 Val. Loss: 3.563


100%|██████████| 222/222 [03:50<00:00,  1.04s/it]
100%|██████████| 8/8 [00:02<00:00,  2.79it/s]


Epoch: 266 | Time: 3m 54s
	Train Loss: 2.536
	 Val. Loss: 3.573


100%|██████████| 222/222 [03:58<00:00,  1.07s/it]
100%|██████████| 8/8 [00:02<00:00,  2.87it/s]


Epoch: 267 | Time: 4m 2s
	Train Loss: 2.536
	 Val. Loss: 3.588


 97%|█████████▋| 216/222 [03:51<00:06,  1.07s/it]


KeyboardInterrupt: 