# Import

These requirements are necessary if you launch this notebook from SageMaker instances

In [1]:
"""!pip install mlflow
!pip install pytorch-lightning
!pip install transformers
!pip install tqdm
!pip install sagemaker

!pip install s3fs
!pip install smdebug"""

'!pip install mlflow\n!pip install pytorch-lightning\n!pip install transformers\n!pip install tqdm\n!pip install sagemaker\n\n!pip install s3fs\n!pip install smdebug'

In [2]:
import sys
sys.path.append('../../../')

import os
import sys
import logging
import argparse
from pathlib import Path
from typing import Any, Dict, Optional

In [3]:
from tqdm.auto import tqdm

import torchmetrics
from torchmetrics.functional import accuracy, f1, auroc

import sagemaker
from sagemaker import get_execution_role
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.core.decorators import auto_move_data
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import MLFlowLogger


import matplotlib.pyplot as plt
from pylab import rcParams
from matplotlib import rc

import pandas as pd

Local constants, regarding the data, MLFlow server, paths, etc..: use them

In [None]:
from deep.constants import *
from deep.utils import *

In [5]:
%load_ext autoreload
%autoreload 2

# Data

You can use the data you want. We advise the `pandas` format.

In [6]:
DATA_PATH = os.path.join(
    '..', '..', '..', "data", "frameworks_data", 'data_v0.7.1'
)


tot_df = pd.read_csv(os.path.join(DATA_PATH, 'new_columns_train_val.csv')).drop_duplicates()

test_df = pd.read_csv(os.path.join(DATA_PATH, 'new_columns_test_v0.7.1.csv'))

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [7]:
columns = ['excerpt', 'entry_id',
           'sectors',
           'present_prim_tags',
           'pillars_2d',
           'pillars_1d', 
           'impact_capresp_humcond', 
           'need_intervention_risk',
           'context_covid', 
           'displacement_shockevent',
           'access_infcom_casualities'
        ]

tot_df = tot_df[columns]
test_df = test_df[columns]

## Sagemaker Prep

### Session

Configure SageMaker

In [8]:
sess = sagemaker.Session(default_bucket=DEV_BUCKET.name)
role = SAGEMAKER_ROLE
role_arn = SAGEMAKER_ROLE_ARN
tracking_uri = MLFLOW_SERVER

### Bucket upload

You need to upload data to an S3 bucket. 




In [9]:
MLFLOW_SERVER

'http://mlflow-deep-387470f3-1883319727.us-east-1.elb.amazonaws.com/'

In [10]:
sample = False  # To make the computations faster, sample = True.

if sample:
    tot_df = tot_df.sample(n=20_000)
    
job_name = f"pytorch-{formatted_time()}-all-models"  # change it as you prefer
input_path = DEV_BUCKET / 'training' / 'input_data' / job_name  # Do not change this

train_path = str(input_path / 'train.pickle')
val_path = str(input_path / 'val.pickle')


tot_df.to_pickle(train_path, protocol=4)  # protocol 4 is necessary, since SageMaker uses python 3.6
test_df.to_pickle(val_path, protocol=4)

### Estimator Definition

In [11]:
# GPU instances

instances = [
    'ml.p2.xlarge',
    'ml.p3.2xlarge'
]

The hyperparameters are passed as command line arguments to the training script. 

You can add/change them as you like. It's important to keep the `tracking_uri` and the `experiment_name` which are used by MLFlow.

The class `PyTorch` is part of the `SageMaker` python API. The parameters are important and you should probably not change most of them. The ones you may want to change are:

- `instance_type`, specify the instance you want
- `source_dir`, specify your script directory. Try to use global variable as much as possible

In [12]:
from sagemaker.pytorch import PyTorch

instance_type='ml.p3.2xlarge'

hyperparameters={
    'tracking_uri': MLFLOW_SERVER,
    'experiment_name': "pl-all-models-experiments",
    'max_len': 512,
    'epochs': 6,
    'model_name': 'microsoft/xtremedistil-l6-h256-uncased',
    'tokenizer_name': 'microsoft/xtremedistil-l6-h256-uncased',
    'output_length': 256,
    'training_names':','.join(columns[2:]),
    "instance_type": instance_type,
    'beta_f1': 0.8,
    'nb_repetitions': 2
}

estimator = PyTorch(
    entry_point='train_mlflow.py',
    source_dir=str('../../../scripts/training/selim/multiclass-lightning'),
    output_path=str(DEV_BUCKET/'models/'),
    code_location=str(input_path),
    instance_type=instance_type,
    instance_count=1,
    role=role,
    framework_version="1.8",
    py_version="py36",
    hyperparameters = hyperparameters,
    job_name=job_name,
#     train_instance_count=2,
#     train_instance_type="ml.c4.xlarge",
)

In [13]:
fit_arguments = {
    'train': str(input_path),
    'test': str(input_path)
}

In [14]:
# Fit the estimator

estimator.fit(fit_arguments, job_name=job_name)

2021-11-17 12:29:27 Starting - Starting the training job...
2021-11-17 12:29:46 Starting - Launching requested ML instancesProfilerReport-1637152154: InProgress
......
2021-11-17 12:30:57 Starting - Preparing the instances for training.........
2021-11-17 12:32:35 Downloading - Downloading input data...
2021-11-17 12:33:07 Training - Downloading the training image..........................[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-11-17 12:37:31,836 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-11-17 12:37:31,858 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-11-17 12:37:31,865 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-11-17 12:37:32,335 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/

[34mCollecting docker>=4.0.0
  Downloading docker-5.0.3-py2.py3-none-any.whl (146 kB)[0m
[34mCollecting gunicorn
  Downloading gunicorn-20.1.0-py3-none-any.whl (79 kB)[0m
[34mCollecting querystring-parser
  Downloading querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)[0m
[34mCollecting fsspec==2021.07.0
  Downloading fsspec-2021.7.0-py3-none-any.whl (118 kB)[0m
[34mCollecting aiobotocore>=1.0.1
  Downloading aiobotocore-2.0.0.tar.gz (52 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'[0m
[34mCollecting botocore<1.22.9,>=1.22.8
  Downloading botocore-1.22.8-py3-none-any.whl (8.1 MB)[0m
[34mCollecting aiohttp>=3.3.1
  Downloading aiohttp-3.8.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)[0m
[34mCollecting aioitertools>=0.5.1
  Downloading aioitertools-0.8.0-py3-none-any.whl (21 kB)[0m
[34mCollecting Mako
  Downloading Mako-1.1.5-py2.py3-none-any.whl 

[34m  Building wheel for idna-ssl (setup.py): finished with status 'done'
  Created wheel for idna-ssl: filename=idna_ssl-1.1.0-py3-none-any.whl size=3160 sha256=cd267a83a4bd7ce519d2a519514dc4d299a875bc144a8de243c312fe7dd01826
  Stored in directory: /root/.cache/pip/wheels/6a/f5/9c/f8331a854f7a8739cf0e74c13854e4dd7b1af11b04fe1dde13[0m
[34mSuccessfully built sagemaker aiobotocore alembic databricks-cli termcolor wrapt idna-ssl[0m
[34mInstalling collected packages: typing-extensions, six, pyasn1-modules, oauthlib, multidict, frozenlist, cachetools, yarl, smmap, requests-oauthlib, numpy, idna-ssl, google-auth, botocore, asynctest, async-timeout, aiosignal, wrapt, tqdm, tensorboard-plugin-wit, tensorboard-data-server, sqlalchemy, regex, python-editor, markdown, Mako, grpcio, google-auth-oauthlib, gitdb, fsspec, aioitertools, aiohttp, absl-py, torchmetrics, tokenizers, termcolor, tensorflow-estimator, tensorboard, sqlparse, sacremoses, querystring-parser, pyDeprecate, prometheus-flask-

[34m[2021-11-17 12:40:12.882 algo-1:80 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2021-11-17 12:40:12.920 algo-1:80 INFO profiler_config_parser.py:102] User has disabled profiler.[0m
[34m[2021-11-17 12:40:12.921 algo-1:80 INFO json_config.py:91] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.[0m
[34m[2021-11-17 12:40:12.921 algo-1:80 INFO hook.py:201] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.[0m
[34m[2021-11-17 12:40:12.922 algo-1:80 INFO hook.py:255] Saving to /opt/ml/output/tensors[0m
[34m[2021-11-17 12:40:12.922 algo-1:80 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.[0m
[34m[2021-11-17 12:40:13.145 algo-1:80 INFO hook.py:594] name:model.l0.embeddings.word_embeddings.weight count_params:7813632[0m
[34m[2021-11-17 12:40:13.145 algo-1:80 INFO hook.py:594] name:model.l0.embeddings.position_embeddings.weig

[34m#015Validation sanity check: 0it [00:00, ?it/s]#015Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]#015                                                              #015#015Training: 99it [00:00, ?it/s]#015Training:   0%|          | 0/7586 [00:00<?, ?it/s]#015Epoch 0:   0%|          | 0/7586 [00:00<?, ?it/s] #015Epoch 0:   0%|          | 30/7586 [00:03<14:43,  8.56it/s]#015Epoch 0:   0%|          | 30/7586 [00:03<14:43,  8.56it/s, loss=2.28, v_num=0, val_loss_epoch=0.699, train_loss=1.740]#015Epoch 0:   1%|          | 60/7586 [00:06<14:28,  8.67it/s, loss=2.28, v_num=0, val_loss_epoch=0.699, train_loss=1.740]#015Epoch 0:   1%|          | 60/7586 [00:06<14:28,  8.67it/s, loss=1.8, v_num=0, val_loss_epoch=0.699, train_loss=1.320] #015Epoch 0:   1%|          | 90/7586 [00:10<14:13,  8.78it/s, loss=1.8, v_num=0, val_loss_epoch=0.699, train_loss=1.320]#015Epoch 0:   1%|          | 90/7586 [00:10<14:13,  8.78it/s, loss=1.87, v_num=0, val_loss_epoch=0.699, train_loss=2.110]

[34m<10:22,  9.01it/s, loss=1.18, v_num=0, val_loss_epoch=0.699, train_loss=1.570]#015Epoch 0:  26%|██▌       | 1980/7586 [03:39<10:22,  9.01it/s, loss=1.08, v_num=0, val_loss_epoch=0.699, train_loss=0.736]#015Epoch 0:  26%|██▋       | 2010/7586 [03:43<10:19,  9.01it/s, loss=1.08, v_num=0, val_loss_epoch=0.699, train_loss=0.736]#015Epoch 0:  26%|██▋       | 2010/7586 [03:43<10:19,  9.01it/s, loss=1.08, v_num=0, val_loss_epoch=0.699, train_loss=0.985]#015Epoch 0:  27%|██▋       | 2040/7586 [03:46<10:15,  9.01it/s, loss=1.08, v_num=0, val_loss_epoch=0.699, train_loss=0.985]#015Epoch 0:  27%|██▋       | 2040/7586 [03:46<10:15,  9.01it/s, loss=1.17, v_num=0, val_loss_epoch=0.699, train_loss=0.637]#015Epoch 0:  27%|██▋       | 2070/7586 [03:49<10:12,  9.01it/s, loss=1.17, v_num=0, val_loss_epoch=0.699, train_loss=0.637]#015Epoch 0:  27%|██▋       | 2070/7586 [03:49<10:12,  9.01it/s, loss=1.15, v_num=0, val_loss_epoch=0.699, train_loss=0.572]#015Epoch 0:  28%|██▊       | 2100/7586 [03:53<10

[34m816, v_num=0, val_loss_epoch=0.699, train_loss=0.634]#015Epoch 0:  51%|█████     | 3870/7586 [07:09<06:52,  9.01it/s, loss=0.997, v_num=0, val_loss_epoch=0.699, train_loss=0.795]#015Epoch 0:  51%|█████▏    | 3900/7586 [07:12<06:49,  9.01it/s, loss=0.997, v_num=0, val_loss_epoch=0.699, train_loss=0.795]#015Epoch 0:  51%|█████▏    | 3900/7586 [07:12<06:49,  9.01it/s, loss=0.986, v_num=0, val_loss_epoch=0.699, train_loss=1.150]#015Epoch 0:  52%|█████▏    | 3930/7586 [07:16<06:45,  9.01it/s, loss=0.986, v_num=0, val_loss_epoch=0.699, train_loss=1.150]#015Epoch 0:  52%|█████▏    | 3930/7586 [07:16<06:45,  9.01it/s, loss=0.832, v_num=0, val_loss_epoch=0.699, train_loss=1.080]#015Epoch 0:  52%|█████▏    | 3960/7586 [07:19<06:42,  9.01it/s, loss=0.832, v_num=0, val_loss_epoch=0.699, train_loss=1.080]#015Epoch 0:  52%|█████▏    | 3960/7586 [07:19<06:42,  9.01it/s, loss=0.794, v_num=0, val_loss_epoch=0.699, train_loss=0.726]#015Epoch 0:  53%|█████▎    | 3990/7586 [07:22<06:39,  9.01it/s, lo

[34mm=0, val_loss_epoch=0.699, train_loss=1.370]#015Epoch 0:  75%|███████▌  | 5700/7586 [10:32<03:29,  9.02it/s, loss=0.825, v_num=0, val_loss_epoch=0.699, train_loss=1.370]#015Epoch 0:  75%|███████▌  | 5700/7586 [10:32<03:29,  9.02it/s, loss=0.728, v_num=0, val_loss_epoch=0.699, train_loss=0.566]#015Epoch 0:  76%|███████▌  | 5730/7586 [10:35<03:25,  9.02it/s, loss=0.728, v_num=0, val_loss_epoch=0.699, train_loss=0.566]#015Epoch 0:  76%|███████▌  | 5730/7586 [10:35<03:25,  9.02it/s, loss=0.902, v_num=0, val_loss_epoch=0.699, train_loss=0.663]#015Epoch 0:  76%|███████▌  | 5760/7586 [10:38<03:22,  9.02it/s, loss=0.902, v_num=0, val_loss_epoch=0.699, train_loss=0.663]#015Epoch 0:  76%|███████▌  | 5760/7586 [10:38<03:22,  9.02it/s, loss=0.725, v_num=0, val_loss_epoch=0.699, train_loss=0.790]#015Epoch 0:  76%|███████▋  | 5790/7586 [10:42<03:19,  9.02it/s, loss=0.725, v_num=0, val_loss_epoch=0.699, train_loss=0.790]#015Epoch 0:  76%|███████▋  | 5790/7586 [10:42<03:19,  9.02it/s, loss=0.964,

[34m#015Validating:  14%|█▍        | 60/416 [00:03<00:22, 16.14it/s]#033[A#015Epoch 0:  96%|█████████▌| 7260/7586 [13:19<00:35,  9.08it/s, loss=0.813, v_num=0, val_loss_epoch=0.699, train_loss=0.382][0m
[34m#015Validating:  22%|██▏       | 90/416 [00:05<00:19, 16.49it/s]#033[A#015Epoch 0:  96%|█████████▌| 7290/7586 [13:21<00:32,  9.10it/s, loss=0.813, v_num=0, val_loss_epoch=0.699, train_loss=0.382][0m
[34m#015Validating:  29%|██▉       | 120/416 [00:07<00:17, 16.75it/s]#033[A#015Epoch 0:  96%|█████████▋| 7320/7586 [13:22<00:29,  9.12it/s, loss=0.813, v_num=0, val_loss_epoch=0.699, train_loss=0.382][0m
[34m#015Validating:  36%|███▌      | 150/416 [00:08<00:15, 16.92it/s]#033[A#015Epoch 0:  97%|█████████▋| 7350/7586 [13:24<00:25,  9.13it/s, loss=0.813, v_num=0, val_loss_epoch=0.699, train_loss=0.382][0m
[34m#015Validating:  43%|████▎     | 180/416 [00:10<00:13, 17.05it/s]#033[A#015Epoch 0:  97%|█████████▋| 7380/7586 [13:26<00:22,  9.15it/s, loss=0.813, v_num=0, val_loss_epoch=0

[34m7, train_loss=0.317, val_loss_step=0.0707]#015Epoch 1:  22%|██▏       | 1680/7586 [03:06<10:56,  8.99it/s, loss=0.658, v_num=0, val_loss_epoch=0.137, train_loss=0.317, val_loss_step=0.0707]#015Epoch 1:  22%|██▏       | 1680/7586 [03:06<10:56,  8.99it/s, loss=0.687, v_num=0, val_loss_epoch=0.137, train_loss=1.680, val_loss_step=0.0707]#015Epoch 1:  23%|██▎       | 1710/7586 [03:10<10:53,  9.00it/s, loss=0.687, v_num=0, val_loss_epoch=0.137, train_loss=1.680, val_loss_step=0.0707]#015Epoch 1:  23%|██▎       | 1710/7586 [03:10<10:53,  9.00it/s, loss=0.793, v_num=0, val_loss_epoch=0.137, train_loss=0.396, val_loss_step=0.0707]#015Epoch 1:  23%|██▎       | 1740/7586 [03:13<10:49,  9.00it/s, loss=0.793, v_num=0, val_loss_epoch=0.137, train_loss=0.396, val_loss_step=0.0707]#015Epoch 1:  23%|██▎       | 1740/7586 [03:13<10:49,  9.00it/s, loss=0.71, v_num=0, val_loss_epoch=0.137, train_loss=0.988, val_loss_step=0.0707] #015Epoch 1:  23%|██▎       | 1770/7586 [03:16<10:46,  9.00it/s, loss=0

[34m 44%|████▎     | 3300/7586 [06:06<07:55,  9.00it/s, loss=0.885, v_num=0, val_loss_epoch=0.137, train_loss=0.474, val_loss_step=0.0707]#015Epoch 1:  44%|████▎     | 3300/7586 [06:06<07:55,  9.00it/s, loss=0.813, v_num=0, val_loss_epoch=0.137, train_loss=0.358, val_loss_step=0.0707]#015Epoch 1:  44%|████▍     | 3330/7586 [06:09<07:52,  9.00it/s, loss=0.813, v_num=0, val_loss_epoch=0.137, train_loss=0.358, val_loss_step=0.0707]#015Epoch 1:  44%|████▍     | 3330/7586 [06:09<07:52,  9.00it/s, loss=0.748, v_num=0, val_loss_epoch=0.137, train_loss=1.430, val_loss_step=0.0707]#015Epoch 1:  44%|████▍     | 3360/7586 [06:13<07:49,  9.01it/s, loss=0.748, v_num=0, val_loss_epoch=0.137, train_loss=1.430, val_loss_step=0.0707]#015Epoch 1:  44%|████▍     | 3360/7586 [06:13<07:49,  9.01it/s, loss=0.562, v_num=0, val_loss_epoch=0.137, train_loss=0.663, val_loss_step=0.0707]#015Epoch 1:  45%|████▍     | 3390/7586 [06:16<07:45,  9.00it/s, loss=0.562, v_num=0, val_loss_epoch=0.137, train_loss=0.663, 

[34m/s, loss=0.543, v_num=0, val_loss_epoch=0.137, train_loss=0.801, val_loss_step=0.0707]#015Epoch 1:  64%|██████▍   | 4890/7586 [09:03<04:59,  9.00it/s, loss=0.543, v_num=0, val_loss_epoch=0.137, train_loss=0.801, val_loss_step=0.0707]#015Epoch 1:  64%|██████▍   | 4890/7586 [09:03<04:59,  9.00it/s, loss=0.62, v_num=0, val_loss_epoch=0.137, train_loss=0.525, val_loss_step=0.0707] #015Epoch 1:  65%|██████▍   | 4920/7586 [09:06<04:56,  9.00it/s, loss=0.62, v_num=0, val_loss_epoch=0.137, train_loss=0.525, val_loss_step=0.0707]#015Epoch 1:  65%|██████▍   | 4920/7586 [09:06<04:56,  9.00it/s, loss=0.641, v_num=0, val_loss_epoch=0.137, train_loss=1.030, val_loss_step=0.0707]#015Epoch 1:  65%|██████▌   | 4950/7586 [09:09<04:52,  9.00it/s, loss=0.641, v_num=0, val_loss_epoch=0.137, train_loss=1.030, val_loss_step=0.0707]#015Epoch 1:  65%|██████▌   | 4950/7586 [09:09<04:52,  9.00it/s, loss=0.805, v_num=0, val_loss_epoch=0.137, train_loss=1.600, val_loss_step=0.0707]#015Epoch 1:  66%|██████▌   

[34m██▍ | 6420/7586 [11:52<02:09,  9.00it/s, loss=0.58, v_num=0, val_loss_epoch=0.137, train_loss=0.294, val_loss_step=0.0707]#015Epoch 1:  85%|████████▍ | 6420/7586 [11:52<02:09,  9.00it/s, loss=0.831, v_num=0, val_loss_epoch=0.137, train_loss=0.406, val_loss_step=0.0707]#015Epoch 1:  85%|████████▌ | 6450/7586 [11:56<02:06,  9.00it/s, loss=0.831, v_num=0, val_loss_epoch=0.137, train_loss=0.406, val_loss_step=0.0707]#015Epoch 1:  85%|████████▌ | 6450/7586 [11:56<02:06,  9.00it/s, loss=0.605, v_num=0, val_loss_epoch=0.137, train_loss=0.516, val_loss_step=0.0707]#015Epoch 1:  85%|████████▌ | 6480/7586 [11:59<02:02,  9.00it/s, loss=0.605, v_num=0, val_loss_epoch=0.137, train_loss=0.516, val_loss_step=0.0707]#015Epoch 1:  85%|████████▌ | 6480/7586 [11:59<02:02,  9.00it/s, loss=0.729, v_num=0, val_loss_epoch=0.137, train_loss=0.508, val_loss_step=0.0707]#015Epoch 1:  86%|████████▌ | 6510/7586 [12:03<01:59,  9.00it/s, loss=0.729, v_num=0, val_loss_epoch=0.137, train_loss=0.508, val_loss_ste

[34m#015Validating:  36%|███▌      | 150/416 [00:08<00:15, 16.87it/s]#033[A#015Epoch 1:  97%|█████████▋| 7350/7586 [13:25<00:25,  9.12it/s, loss=0.68, v_num=0, val_loss_epoch=0.137, train_loss=1.090, val_loss_step=0.0707][0m
[34m#015Validating:  43%|████▎     | 180/416 [00:10<00:13, 16.97it/s]#033[A#015Epoch 1:  97%|█████████▋| 7380/7586 [13:27<00:22,  9.14it/s, loss=0.68, v_num=0, val_loss_epoch=0.137, train_loss=1.090, val_loss_step=0.0707][0m
[34m#015Validating:  50%|█████     | 210/416 [00:12<00:12, 17.07it/s]#033[A#015Epoch 1:  98%|█████████▊| 7410/7586 [13:29<00:19,  9.16it/s, loss=0.68, v_num=0, val_loss_epoch=0.137, train_loss=1.090, val_loss_step=0.0707][0m
[34m#015Validating:  58%|█████▊    | 240/416 [00:14<00:10, 17.16it/s]#033[A#015Epoch 1:  98%|█████████▊| 7440/7586 [13:30<00:15,  9.18it/s, loss=0.68, v_num=0, val_loss_epoch=0.137, train_loss=1.090, val_loss_step=0.0707][0m
[34m#015Validating:  65%|██████▍   | 270/416 [00:15<00:08, 17.22it/s]#033[A#015Epoch 1:  98

[34m0.126, train_loss=1.430, val_loss_step=0.0917] #015Epoch 2:  22%|██▏       | 1680/7586 [03:06<10:56,  8.99it/s, loss=0.62, v_num=0, val_loss_epoch=0.126, train_loss=1.430, val_loss_step=0.0917]#015Epoch 2:  22%|██▏       | 1680/7586 [03:06<10:56,  8.99it/s, loss=0.622, v_num=0, val_loss_epoch=0.126, train_loss=1.690, val_loss_step=0.0917]#015Epoch 2:  23%|██▎       | 1710/7586 [03:10<10:53,  8.99it/s, loss=0.622, v_num=0, val_loss_epoch=0.126, train_loss=1.690, val_loss_step=0.0917]#015Epoch 2:  23%|██▎       | 1710/7586 [03:10<10:53,  8.99it/s, loss=0.589, v_num=0, val_loss_epoch=0.126, train_loss=0.537, val_loss_step=0.0917]#015Epoch 2:  23%|██▎       | 1740/7586 [03:13<10:49,  8.99it/s, loss=0.589, v_num=0, val_loss_epoch=0.126, train_loss=0.537, val_loss_step=0.0917]#015Epoch 2:  23%|██▎       | 1740/7586 [03:13<10:49,  8.99it/s, loss=0.573, v_num=0, val_loss_epoch=0.126, train_loss=0.636, val_loss_step=0.0917]#015Epoch 2:  23%|██▎       | 1770/7586 [03:16<10:46,  8.99it/s, lo

[34m  44%|████▎     | 3300/7586 [06:06<07:56,  9.00it/s, loss=0.545, v_num=0, val_loss_epoch=0.126, train_loss=0.472, val_loss_step=0.0917]#015Epoch 2:  44%|████▎     | 3300/7586 [06:06<07:56,  9.00it/s, loss=0.763, v_num=0, val_loss_epoch=0.126, train_loss=1.250, val_loss_step=0.0917]#015Epoch 2:  44%|████▍     | 3330/7586 [06:09<07:52,  9.00it/s, loss=0.763, v_num=0, val_loss_epoch=0.126, train_loss=1.250, val_loss_step=0.0917]#015Epoch 2:  44%|████▍     | 3330/7586 [06:09<07:52,  9.00it/s, loss=0.74, v_num=0, val_loss_epoch=0.126, train_loss=0.793, val_loss_step=0.0917] #015Epoch 2:  44%|████▍     | 3360/7586 [06:13<07:49,  9.00it/s, loss=0.74, v_num=0, val_loss_epoch=0.126, train_loss=0.793, val_loss_step=0.0917]#015Epoch 2:  44%|████▍     | 3360/7586 [06:13<07:49,  9.00it/s, loss=0.724, v_num=0, val_loss_epoch=0.126, train_loss=0.817, val_loss_step=0.0917]#015Epoch 2:  45%|████▍     | 3390/7586 [06:16<07:46,  9.00it/s, loss=0.724, v_num=0, val_loss_epoch=0.126, train_loss=0.817, 

[34m0it/s, loss=0.513, v_num=0, val_loss_epoch=0.126, train_loss=0.353, val_loss_step=0.0917]#015Epoch 2:  64%|██████▍   | 4890/7586 [09:03<04:59,  9.00it/s, loss=0.513, v_num=0, val_loss_epoch=0.126, train_loss=0.353, val_loss_step=0.0917]#015Epoch 2:  64%|██████▍   | 4890/7586 [09:03<04:59,  9.00it/s, loss=0.695, v_num=0, val_loss_epoch=0.126, train_loss=0.766, val_loss_step=0.0917]#015Epoch 2:  65%|██████▍   | 4920/7586 [09:06<04:56,  9.00it/s, loss=0.695, v_num=0, val_loss_epoch=0.126, train_loss=0.766, val_loss_step=0.0917]#015Epoch 2:  65%|██████▍   | 4920/7586 [09:06<04:56,  9.00it/s, loss=0.679, v_num=0, val_loss_epoch=0.126, train_loss=0.313, val_loss_step=0.0917]#015Epoch 2:  65%|██████▌   | 4950/7586 [09:09<04:52,  9.01it/s, loss=0.679, v_num=0, val_loss_epoch=0.126, train_loss=0.313, val_loss_step=0.0917]#015Epoch 2:  65%|██████▌   | 4950/7586 [09:09<04:52,  9.01it/s, loss=0.687, v_num=0, val_loss_epoch=0.126, train_loss=0.890, val_loss_step=0.0917]#015Epoch 2:  66%|██████

[34m��█████▍ | 6420/7586 [11:52<02:09,  9.01it/s, loss=0.556, v_num=0, val_loss_epoch=0.126, train_loss=0.491, val_loss_step=0.0917]#015Epoch 2:  85%|████████▍ | 6420/7586 [11:52<02:09,  9.01it/s, loss=0.64, v_num=0, val_loss_epoch=0.126, train_loss=0.842, val_loss_step=0.0917] #015Epoch 2:  85%|████████▌ | 6450/7586 [11:56<02:06,  9.01it/s, loss=0.64, v_num=0, val_loss_epoch=0.126, train_loss=0.842, val_loss_step=0.0917]#015Epoch 2:  85%|████████▌ | 6450/7586 [11:56<02:06,  9.01it/s, loss=0.527, v_num=0, val_loss_epoch=0.126, train_loss=0.352, val_loss_step=0.0917]#015Epoch 2:  85%|████████▌ | 6480/7586 [11:59<02:02,  9.01it/s, loss=0.527, v_num=0, val_loss_epoch=0.126, train_loss=0.352, val_loss_step=0.0917]#015Epoch 2:  85%|████████▌ | 6480/7586 [11:59<02:02,  9.01it/s, loss=0.612, v_num=0, val_loss_epoch=0.126, train_loss=0.609, val_loss_step=0.0917]#015Epoch 2:  86%|████████▌ | 6510/7586 [12:02<01:59,  9.01it/s, loss=0.612, v_num=0, val_loss_epoch=0.126, train_loss=0.609, val_los

[34m#015Validating:  58%|█████▊    | 240/416 [00:14<00:10, 17.20it/s]#033[A#015Epoch 2:  98%|█████████▊| 7440/7586 [13:30<00:15,  9.18it/s, loss=0.649, v_num=0, val_loss_epoch=0.126, train_loss=1.400, val_loss_step=0.0917][0m
[34m#015Validating:  65%|██████▍   | 270/416 [00:15<00:08, 17.26it/s]#033[A#015Epoch 2:  98%|█████████▊| 7470/7586 [13:32<00:12,  9.20it/s, loss=0.649, v_num=0, val_loss_epoch=0.126, train_loss=1.400, val_loss_step=0.0917][0m
[34m#015Validating:  72%|███████▏  | 300/416 [00:17<00:06, 17.28it/s]#033[A#015Epoch 2:  99%|█████████▉| 7500/7586 [13:34<00:09,  9.21it/s, loss=0.649, v_num=0, val_loss_epoch=0.126, train_loss=1.400, val_loss_step=0.0917][0m
[34m#015Validating:  79%|███████▉  | 330/416 [00:19<00:04, 17.29it/s]#033[A#015Epoch 2:  99%|█████████▉| 7530/7586 [13:35<00:06,  9.23it/s, loss=0.649, v_num=0, val_loss_epoch=0.126, train_loss=1.400, val_loss_step=0.0917][0m
[34m#015Validating:  87%|████████▋ | 360/416 [00:20<00:03, 17.31it/s]#033[A#015Epoch 2:

[34m120, train_loss=0.223, val_loss_step=0.0875] #015Epoch 3:  22%|██▏       | 1680/7586 [03:06<10:55,  9.01it/s, loss=0.43, v_num=0, val_loss_epoch=0.120, train_loss=0.223, val_loss_step=0.0875]#015Epoch 3:  22%|██▏       | 1680/7586 [03:06<10:55,  9.01it/s, loss=0.539, v_num=0, val_loss_epoch=0.120, train_loss=0.311, val_loss_step=0.0875]#015Epoch 3:  23%|██▎       | 1710/7586 [03:09<10:52,  9.00it/s, loss=0.539, v_num=0, val_loss_epoch=0.120, train_loss=0.311, val_loss_step=0.0875]#015Epoch 3:  23%|██▎       | 1710/7586 [03:09<10:52,  9.00it/s, loss=0.438, v_num=0, val_loss_epoch=0.120, train_loss=1.120, val_loss_step=0.0875]#015Epoch 3:  23%|██▎       | 1740/7586 [03:13<10:49,  9.00it/s, loss=0.438, v_num=0, val_loss_epoch=0.120, train_loss=1.120, val_loss_step=0.0875]#015Epoch 3:  23%|██▎       | 1740/7586 [03:13<10:49,  9.00it/s, loss=0.618, v_num=0, val_loss_epoch=0.120, train_loss=0.273, val_loss_step=0.0875]#015Epoch 3:  23%|██▎       | 1770/7586 [03:16<10:45,  9.01it/s, loss

[34moch 3:  44%|████▎     | 3300/7586 [06:06<07:55,  9.01it/s, loss=0.503, v_num=0, val_loss_epoch=0.120, train_loss=1.030, val_loss_step=0.0875]#015Epoch 3:  44%|████▎     | 3300/7586 [06:06<07:55,  9.01it/s, loss=0.545, v_num=0, val_loss_epoch=0.120, train_loss=0.297, val_loss_step=0.0875]#015Epoch 3:  44%|████▍     | 3330/7586 [06:09<07:52,  9.01it/s, loss=0.545, v_num=0, val_loss_epoch=0.120, train_loss=0.297, val_loss_step=0.0875]#015Epoch 3:  44%|████▍     | 3330/7586 [06:09<07:52,  9.01it/s, loss=0.556, v_num=0, val_loss_epoch=0.120, train_loss=0.627, val_loss_step=0.0875]#015Epoch 3:  44%|████▍     | 3360/7586 [06:12<07:48,  9.01it/s, loss=0.556, v_num=0, val_loss_epoch=0.120, train_loss=0.627, val_loss_step=0.0875]#015Epoch 3:  44%|████▍     | 3360/7586 [06:12<07:48,  9.01it/s, loss=0.657, v_num=0, val_loss_epoch=0.120, train_loss=1.990, val_loss_step=0.0875]#015Epoch 3:  45%|████▍     | 3390/7586 [06:16<07:45,  9.01it/s, loss=0.657, v_num=0, val_loss_epoch=0.120, train_loss=

[34m  9.01it/s, loss=0.466, v_num=0, val_loss_epoch=0.120, train_loss=0.228, val_loss_step=0.0875]#015Epoch 3:  64%|██████▍   | 4890/7586 [09:02<04:59,  9.01it/s, loss=0.466, v_num=0, val_loss_epoch=0.120, train_loss=0.228, val_loss_step=0.0875]#015Epoch 3:  64%|██████▍   | 4890/7586 [09:02<04:59,  9.01it/s, loss=0.441, v_num=0, val_loss_epoch=0.120, train_loss=0.390, val_loss_step=0.0875]#015Epoch 3:  65%|██████▍   | 4920/7586 [09:06<04:55,  9.01it/s, loss=0.441, v_num=0, val_loss_epoch=0.120, train_loss=0.390, val_loss_step=0.0875]#015Epoch 3:  65%|██████▍   | 4920/7586 [09:06<04:55,  9.01it/s, loss=0.556, v_num=0, val_loss_epoch=0.120, train_loss=0.383, val_loss_step=0.0875]#015Epoch 3:  65%|██████▌   | 4950/7586 [09:09<04:52,  9.01it/s, loss=0.556, v_num=0, val_loss_epoch=0.120, train_loss=0.383, val_loss_step=0.0875]#015Epoch 3:  65%|██████▌   | 4950/7586 [09:09<04:52,  9.01it/s, loss=0.436, v_num=0, val_loss_epoch=0.120, train_loss=0.724, val_loss_step=0.0875]#015Epoch 3:  66%|█

[34m████████▍ | 6420/7586 [11:52<02:09,  9.01it/s, loss=0.402, v_num=0, val_loss_epoch=0.120, train_loss=0.474, val_loss_step=0.0875]#015Epoch 3:  85%|████████▍ | 6420/7586 [11:52<02:09,  9.01it/s, loss=0.445, v_num=0, val_loss_epoch=0.120, train_loss=0.284, val_loss_step=0.0875]#015Epoch 3:  85%|████████▌ | 6450/7586 [11:55<02:06,  9.01it/s, loss=0.445, v_num=0, val_loss_epoch=0.120, train_loss=0.284, val_loss_step=0.0875]#015Epoch 3:  85%|████████▌ | 6450/7586 [11:55<02:06,  9.01it/s, loss=0.489, v_num=0, val_loss_epoch=0.120, train_loss=0.164, val_loss_step=0.0875]#015Epoch 3:  85%|████████▌ | 6480/7586 [11:59<02:02,  9.01it/s, loss=0.489, v_num=0, val_loss_epoch=0.120, train_loss=0.164, val_loss_step=0.0875]#015Epoch 3:  85%|████████▌ | 6480/7586 [11:59<02:02,  9.01it/s, loss=0.519, v_num=0, val_loss_epoch=0.120, train_loss=0.596, val_loss_step=0.0875]#015Epoch 3:  86%|████████▌ | 6510/7586 [12:02<01:59,  9.01it/s, loss=0.519, v_num=0, val_loss_epoch=0.120, train_loss=0.596, val_l

[34m#015Validating:  58%|█████▊    | 240/416 [00:13<00:10, 17.23it/s]#033[A#015Epoch 3:  98%|█████████▊| 7440/7586 [13:29<00:15,  9.19it/s, loss=0.668, v_num=0, val_loss_epoch=0.120, train_loss=1.850, val_loss_step=0.0875][0m
[34m#015Validating:  65%|██████▍   | 270/416 [00:15<00:08, 17.25it/s]#033[A#015Epoch 3:  98%|█████████▊| 7470/7586 [13:31<00:12,  9.20it/s, loss=0.668, v_num=0, val_loss_epoch=0.120, train_loss=1.850, val_loss_step=0.0875][0m
[34m#015Validating:  72%|███████▏  | 300/416 [00:17<00:06, 17.29it/s]#033[A#015Epoch 3:  99%|█████████▉| 7500/7586 [13:33<00:09,  9.22it/s, loss=0.668, v_num=0, val_loss_epoch=0.120, train_loss=1.850, val_loss_step=0.0875][0m
[34m#015Validating:  79%|███████▉  | 330/416 [00:19<00:04, 17.29it/s]#033[A#015Epoch 3:  99%|█████████▉| 7530/7586 [13:35<00:06,  9.24it/s, loss=0.668, v_num=0, val_loss_epoch=0.120, train_loss=1.850, val_loss_step=0.0875][0m
[34m#015Validating:  87%|████████▋ | 360/416 [00:20<00:03, 17.34it/s]#033[A#015Epoch 3:

[34m1.11, v_num=1, val_loss_epoch=0.677, train_loss=0.757]#015Epoch 0:  26%|██▌       | 1950/7585 [03:35<10:23,  9.04it/s, loss=1.07, v_num=1, val_loss_epoch=0.677, train_loss=1.210]#015Epoch 0:  26%|██▌       | 1980/7585 [03:38<10:19,  9.05it/s, loss=1.07, v_num=1, val_loss_epoch=0.677, train_loss=1.210]#015Epoch 0:  26%|██▌       | 1980/7585 [03:38<10:19,  9.05it/s, loss=1.1, v_num=1, val_loss_epoch=0.677, train_loss=1.030] #015Epoch 0:  26%|██▋       | 2010/7585 [03:42<10:16,  9.05it/s, loss=1.1, v_num=1, val_loss_epoch=0.677, train_loss=1.030]#015Epoch 0:  26%|██▋       | 2010/7585 [03:42<10:16,  9.05it/s, loss=1.18, v_num=1, val_loss_epoch=0.677, train_loss=1.770]#015Epoch 0:  27%|██▋       | 2040/7585 [03:45<10:12,  9.05it/s, loss=1.18, v_num=1, val_loss_epoch=0.677, train_loss=1.770]#015Epoch 0:  27%|██▋       | 2040/7585 [03:45<10:12,  9.05it/s, loss=1.35, v_num=1, val_loss_epoch=0.677, train_loss=0.848]#015Epoch 0:  27%|██▋       | 2070/7585 [03:48<10:09,  9.05it/s, loss=1.35

[34m51%|█████     | 3840/7585 [07:03<06:53,  9.06it/s, loss=1.09, v_num=1, val_loss_epoch=0.677, train_loss=0.613]#015Epoch 0:  51%|█████     | 3870/7585 [07:07<06:49,  9.06it/s, loss=1.09, v_num=1, val_loss_epoch=0.677, train_loss=0.613]#015Epoch 0:  51%|█████     | 3870/7585 [07:07<06:49,  9.06it/s, loss=0.906, v_num=1, val_loss_epoch=0.677, train_loss=0.585]#015Epoch 0:  51%|█████▏    | 3900/7585 [07:10<06:46,  9.06it/s, loss=0.906, v_num=1, val_loss_epoch=0.677, train_loss=0.585]#015Epoch 0:  51%|█████▏    | 3900/7585 [07:10<06:46,  9.06it/s, loss=1.11, v_num=1, val_loss_epoch=0.677, train_loss=1.520] #015Epoch 0:  52%|█████▏    | 3930/7585 [07:13<06:43,  9.06it/s, loss=1.11, v_num=1, val_loss_epoch=0.677, train_loss=1.520]#015Epoch 0:  52%|█████▏    | 3930/7585 [07:13<06:43,  9.06it/s, loss=1.02, v_num=1, val_loss_epoch=0.677, train_loss=0.613]#015Epoch 0:  52%|█████▏    | 3960/7585 [07:17<06:40,  9.06it/s, loss=1.02, v_num=1, val_loss_epoch=0.677, train_loss=0.613]#015Epoch 0:  

[34m:25<03:31,  9.07it/s, loss=0.793, v_num=1, val_loss_epoch=0.677, train_loss=0.358]#015Epoch 0:  75%|███████▍  | 5670/7585 [10:25<03:31,  9.07it/s, loss=0.821, v_num=1, val_loss_epoch=0.677, train_loss=1.010]#015Epoch 0:  75%|███████▌  | 5700/7585 [10:28<03:27,  9.07it/s, loss=0.821, v_num=1, val_loss_epoch=0.677, train_loss=1.010]#015Epoch 0:  75%|███████▌  | 5700/7585 [10:28<03:27,  9.07it/s, loss=0.895, v_num=1, val_loss_epoch=0.677, train_loss=0.645]#015Epoch 0:  76%|███████▌  | 5730/7585 [10:31<03:24,  9.07it/s, loss=0.895, v_num=1, val_loss_epoch=0.677, train_loss=0.645]#015Epoch 0:  76%|███████▌  | 5730/7585 [10:31<03:24,  9.07it/s, loss=1.05, v_num=1, val_loss_epoch=0.677, train_loss=0.401] #015Epoch 0:  76%|███████▌  | 5760/7585 [10:35<03:21,  9.07it/s, loss=1.05, v_num=1, val_loss_epoch=0.677, train_loss=0.401]#015Epoch 0:  76%|███████▌  | 5760/7585 [10:35<03:21,  9.07it/s, loss=0.864, v_num=1, val_loss_epoch=0.677, train_loss=1.550]#015Epoch 0:  76%|███████▋  | 5790/7585

[34m#015Validating:   7%|▋         | 30/415 [00:02<00:36, 10.52it/s]#033[A#015Epoch 0:  95%|█████████▌| 7230/7585 [13:14<00:38,  9.11it/s, loss=0.921, v_num=1, val_loss_epoch=0.677, train_loss=1.760][0m
[34m#015Validating:  14%|█▍        | 60/415 [00:05<00:33, 10.49it/s]#033[A#015Epoch 0:  96%|█████████▌| 7260/7585 [13:16<00:35,  9.11it/s, loss=0.921, v_num=1, val_loss_epoch=0.677, train_loss=1.760][0m
[34m#015Validating:  22%|██▏       | 90/415 [00:08<00:31, 10.46it/s]#033[A#015Epoch 0:  96%|█████████▌| 7290/7585 [13:19<00:32,  9.11it/s, loss=0.921, v_num=1, val_loss_epoch=0.677, train_loss=1.760][0m
[34m#015Validating:  29%|██▉       | 120/415 [00:11<00:27, 10.59it/s]#033[A#015Epoch 0:  97%|█████████▋| 7320/7585 [13:22<00:29,  9.12it/s, loss=0.921, v_num=1, val_loss_epoch=0.677, train_loss=1.760][0m
[34m#015Validating:  36%|███▌      | 150/415 [00:14<00:24, 10.65it/s]#033[A#015Epoch 0:  97%|█████████▋| 7350/7585 [13:25<00:25,  9.13it/s, loss=0.921, v_num=1, val_loss_epoch=0.

[34m25, v_num=1, val_loss_epoch=0.143, train_loss=1.170, val_loss_step=0.122]#015Epoch 1:  22%|██▏       | 1680/7585 [03:05<10:52,  9.05it/s, loss=0.661, v_num=1, val_loss_epoch=0.143, train_loss=0.487, val_loss_step=0.122]#015Epoch 1:  23%|██▎       | 1710/7585 [03:08<10:49,  9.05it/s, loss=0.661, v_num=1, val_loss_epoch=0.143, train_loss=0.487, val_loss_step=0.122]#015Epoch 1:  23%|██▎       | 1710/7585 [03:08<10:49,  9.05it/s, loss=0.713, v_num=1, val_loss_epoch=0.143, train_loss=0.471, val_loss_step=0.122]#015Epoch 1:  23%|██▎       | 1740/7585 [03:12<10:45,  9.05it/s, loss=0.713, v_num=1, val_loss_epoch=0.143, train_loss=0.471, val_loss_step=0.122]#015Epoch 1:  23%|██▎       | 1740/7585 [03:12<10:45,  9.05it/s, loss=0.842, v_num=1, val_loss_epoch=0.143, train_loss=0.308, val_loss_step=0.122]#015Epoch 1:  23%|██▎       | 1770/7585 [03:15<10:42,  9.05it/s, loss=0.842, v_num=1, val_loss_epoch=0.143, train_loss=0.308, val_loss_step=0.122]#015Epoch 1:  23%|██▎       | 1770/7585 [03:15

[34m_num=1, val_loss_epoch=0.143, train_loss=0.600, val_loss_step=0.122]#015Epoch 1:  44%|████▍     | 3330/7585 [06:07<07:49,  9.06it/s, loss=0.711, v_num=1, val_loss_epoch=0.143, train_loss=0.600, val_loss_step=0.122]#015Epoch 1:  44%|████▍     | 3330/7585 [06:07<07:49,  9.06it/s, loss=0.825, v_num=1, val_loss_epoch=0.143, train_loss=0.508, val_loss_step=0.122]#015Epoch 1:  44%|████▍     | 3360/7585 [06:10<07:46,  9.06it/s, loss=0.825, v_num=1, val_loss_epoch=0.143, train_loss=0.508, val_loss_step=0.122]#015Epoch 1:  44%|████▍     | 3360/7585 [06:10<07:46,  9.06it/s, loss=0.79, v_num=1, val_loss_epoch=0.143, train_loss=0.491, val_loss_step=0.122] #015Epoch 1:  45%|████▍     | 3390/7585 [06:14<07:43,  9.06it/s, loss=0.79, v_num=1, val_loss_epoch=0.143, train_loss=0.491, val_loss_step=0.122]#015Epoch 1:  45%|████▍     | 3390/7585 [06:14<07:43,  9.06it/s, loss=0.729, v_num=1, val_loss_epoch=0.143, train_loss=0.639, val_loss_step=0.122]#015Epoch 1:  45%|████▌     | 3420/7585 [06:17<07:39

[34m, val_loss_epoch=0.143, train_loss=0.422, val_loss_step=0.122]#015Epoch 1:  65%|██████▍   | 4920/7585 [09:03<04:54,  9.06it/s, loss=0.693, v_num=1, val_loss_epoch=0.143, train_loss=0.422, val_loss_step=0.122]#015Epoch 1:  65%|██████▍   | 4920/7585 [09:03<04:54,  9.06it/s, loss=0.782, v_num=1, val_loss_epoch=0.143, train_loss=0.723, val_loss_step=0.122]#015Epoch 1:  65%|██████▌   | 4950/7585 [09:06<04:50,  9.06it/s, loss=0.782, v_num=1, val_loss_epoch=0.143, train_loss=0.723, val_loss_step=0.122]#015Epoch 1:  65%|██████▌   | 4950/7585 [09:06<04:50,  9.06it/s, loss=0.649, v_num=1, val_loss_epoch=0.143, train_loss=0.319, val_loss_step=0.122]#015Epoch 1:  66%|██████▌   | 4980/7585 [09:09<04:47,  9.06it/s, loss=0.649, v_num=1, val_loss_epoch=0.143, train_loss=0.319, val_loss_step=0.122]#015Epoch 1:  66%|██████▌   | 4980/7585 [09:09<04:47,  9.06it/s, loss=0.642, v_num=1, val_loss_epoch=0.143, train_loss=0.559, val_loss_step=0.122]#015Epoch 1:  66%|██████▌   | 5010/7585 [09:13<04:44,  9.

[34mal_loss_step=0.122]#015Epoch 1:  85%|████████▌ | 6450/7585 [11:51<02:05,  9.06it/s, loss=0.872, v_num=1, val_loss_epoch=0.143, train_loss=0.438, val_loss_step=0.122]#015Epoch 1:  85%|████████▌ | 6480/7585 [11:55<02:01,  9.06it/s, loss=0.872, v_num=1, val_loss_epoch=0.143, train_loss=0.438, val_loss_step=0.122]#015Epoch 1:  85%|████████▌ | 6480/7585 [11:55<02:01,  9.06it/s, loss=0.819, v_num=1, val_loss_epoch=0.143, train_loss=0.489, val_loss_step=0.122]#015Epoch 1:  86%|████████▌ | 6510/7585 [11:58<01:58,  9.06it/s, loss=0.819, v_num=1, val_loss_epoch=0.143, train_loss=0.489, val_loss_step=0.122]#015Epoch 1:  86%|████████▌ | 6510/7585 [11:58<01:58,  9.06it/s, loss=0.721, v_num=1, val_loss_epoch=0.143, train_loss=0.571, val_loss_step=0.122]#015Epoch 1:  86%|████████▌ | 6540/7585 [12:01<01:55,  9.06it/s, loss=0.721, v_num=1, val_loss_epoch=0.143, train_loss=0.571, val_loss_step=0.122]#015Epoch 1:  86%|████████▌ | 6540/7585 [12:01<01:55,  9.06it/s, loss=0.684, v_num=1, val_loss_epoch

[34m#015Validating:  36%|███▌      | 150/415 [00:14<00:24, 10.73it/s]#033[A#015Epoch 1:  97%|█████████▋| 7350/7585 [13:25<00:25,  9.12it/s, loss=0.682, v_num=1, val_loss_epoch=0.143, train_loss=0.581, val_loss_step=0.122][0m
[34m#015Validating:  43%|████▎     | 180/415 [00:16<00:22, 10.64it/s]#033[A#015Epoch 1:  97%|█████████▋| 7380/7585 [13:28<00:22,  9.13it/s, loss=0.682, v_num=1, val_loss_epoch=0.143, train_loss=0.581, val_loss_step=0.122][0m
[34m#015Validating:  51%|█████     | 210/415 [00:19<00:19, 10.50it/s]#033[A#015Epoch 1:  98%|█████████▊| 7410/7585 [13:31<00:19,  9.13it/s, loss=0.682, v_num=1, val_loss_epoch=0.143, train_loss=0.581, val_loss_step=0.122][0m
[34m#015Validating:  58%|█████▊    | 240/415 [00:22<00:16, 10.43it/s]#033[A#015Epoch 1:  98%|█████████▊| 7440/7585 [13:34<00:15,  9.14it/s, loss=0.682, v_num=1, val_loss_epoch=0.143, train_loss=0.581, val_loss_step=0.122][0m
[34m#015Validating:  65%|██████▌   | 270/415 [00:25<00:14, 10.35it/s]#033[A#015Epoch 1:  98

[34m=0.557, v_num=1, val_loss_epoch=0.127, train_loss=0.302, val_loss_step=0.118]#015Epoch 2:  22%|██▏       | 1680/7585 [03:06<10:54,  9.03it/s, loss=0.667, v_num=1, val_loss_epoch=0.127, train_loss=0.464, val_loss_step=0.118]#015Epoch 2:  23%|██▎       | 1710/7585 [03:09<10:51,  9.02it/s, loss=0.667, v_num=1, val_loss_epoch=0.127, train_loss=0.464, val_loss_step=0.118]#015Epoch 2:  23%|██▎       | 1710/7585 [03:09<10:51,  9.02it/s, loss=0.546, v_num=1, val_loss_epoch=0.127, train_loss=1.080, val_loss_step=0.118]#015Epoch 2:  23%|██▎       | 1740/7585 [03:12<10:47,  9.03it/s, loss=0.546, v_num=1, val_loss_epoch=0.127, train_loss=1.080, val_loss_step=0.118]#015Epoch 2:  23%|██▎       | 1740/7585 [03:12<10:47,  9.03it/s, loss=0.67, v_num=1, val_loss_epoch=0.127, train_loss=0.661, val_loss_step=0.118] #015Epoch 2:  23%|██▎       | 1770/7585 [03:16<10:44,  9.03it/s, loss=0.67, v_num=1, val_loss_epoch=0.127, train_loss=0.661, val_loss_step=0.118]#015Epoch 2:  23%|██▎       | 1770/7585 [03

[34m.611, v_num=1, val_loss_epoch=0.127, train_loss=0.341, val_loss_step=0.118]#015Epoch 2:  44%|████▍     | 3330/7585 [06:08<07:50,  9.04it/s, loss=0.611, v_num=1, val_loss_epoch=0.127, train_loss=0.341, val_loss_step=0.118]#015Epoch 2:  44%|████▍     | 3330/7585 [06:08<07:50,  9.04it/s, loss=0.503, v_num=1, val_loss_epoch=0.127, train_loss=0.619, val_loss_step=0.118]#015Epoch 2:  44%|████▍     | 3360/7585 [06:11<07:47,  9.04it/s, loss=0.503, v_num=1, val_loss_epoch=0.127, train_loss=0.619, val_loss_step=0.118]#015Epoch 2:  44%|████▍     | 3360/7585 [06:11<07:47,  9.04it/s, loss=0.513, v_num=1, val_loss_epoch=0.127, train_loss=1.290, val_loss_step=0.118]#015Epoch 2:  45%|████▍     | 3390/7585 [06:14<07:43,  9.04it/s, loss=0.513, v_num=1, val_loss_epoch=0.127, train_loss=1.290, val_loss_step=0.118]#015Epoch 2:  45%|████▍     | 3390/7585 [06:14<07:43,  9.04it/s, loss=0.617, v_num=1, val_loss_epoch=0.127, train_loss=0.182, val_loss_step=0.118]#015Epoch 2:  45%|████▌     | 3420/7585 [06:

[34m644, v_num=1, val_loss_epoch=0.127, train_loss=0.498, val_loss_step=0.118]#015Epoch 2:  65%|██████▍   | 4920/7585 [09:03<04:54,  9.05it/s, loss=0.644, v_num=1, val_loss_epoch=0.127, train_loss=0.498, val_loss_step=0.118]#015Epoch 2:  65%|██████▍   | 4920/7585 [09:03<04:54,  9.05it/s, loss=0.613, v_num=1, val_loss_epoch=0.127, train_loss=0.850, val_loss_step=0.118]#015Epoch 2:  65%|██████▌   | 4950/7585 [09:07<04:51,  9.05it/s, loss=0.613, v_num=1, val_loss_epoch=0.127, train_loss=0.850, val_loss_step=0.118]#015Epoch 2:  65%|██████▌   | 4950/7585 [09:07<04:51,  9.05it/s, loss=0.658, v_num=1, val_loss_epoch=0.127, train_loss=0.598, val_loss_step=0.118]#015Epoch 2:  66%|██████▌   | 4980/7585 [09:10<04:47,  9.05it/s, loss=0.658, v_num=1, val_loss_epoch=0.127, train_loss=0.598, val_loss_step=0.118]#015Epoch 2:  66%|██████▌   | 4980/7585 [09:10<04:47,  9.05it/s, loss=0.537, v_num=1, val_loss_epoch=0.127, train_loss=0.396, val_loss_step=0.118]#015Epoch 2:  66%|██████▌   | 5010/7585 [09:1

[34ms=0.367, val_loss_step=0.118]#015Epoch 2:  85%|████████▌ | 6450/7585 [11:52<02:05,  9.05it/s, loss=0.716, v_num=1, val_loss_epoch=0.127, train_loss=0.339, val_loss_step=0.118]#015Epoch 2:  85%|████████▌ | 6480/7585 [11:56<02:02,  9.05it/s, loss=0.716, v_num=1, val_loss_epoch=0.127, train_loss=0.339, val_loss_step=0.118]#015Epoch 2:  85%|████████▌ | 6480/7585 [11:56<02:02,  9.05it/s, loss=0.549, v_num=1, val_loss_epoch=0.127, train_loss=0.253, val_loss_step=0.118]#015Epoch 2:  86%|████████▌ | 6510/7585 [11:59<01:58,  9.05it/s, loss=0.549, v_num=1, val_loss_epoch=0.127, train_loss=0.253, val_loss_step=0.118]#015Epoch 2:  86%|████████▌ | 6510/7585 [11:59<01:58,  9.05it/s, loss=0.731, v_num=1, val_loss_epoch=0.127, train_loss=1.990, val_loss_step=0.118]#015Epoch 2:  86%|████████▌ | 6540/7585 [12:02<01:55,  9.05it/s, loss=0.731, v_num=1, val_loss_epoch=0.127, train_loss=1.990, val_loss_step=0.118]#015Epoch 2:  86%|████████▌ | 6540/7585 [12:02<01:55,  9.05it/s, loss=0.728, v_num=1, val_

[34m#015Validating:  36%|███▌      | 150/415 [00:14<00:24, 10.75it/s]#033[A#015Epoch 2:  97%|█████████▋| 7350/7585 [13:26<00:25,  9.11it/s, loss=0.616, v_num=1, val_loss_epoch=0.127, train_loss=0.746, val_loss_step=0.118][0m
[34m#015Validating:  43%|████▎     | 180/415 [00:16<00:22, 10.65it/s]#033[A#015Epoch 2:  97%|█████████▋| 7380/7585 [13:29<00:22,  9.11it/s, loss=0.616, v_num=1, val_loss_epoch=0.127, train_loss=0.746, val_loss_step=0.118][0m
[34m#015Validating:  51%|█████     | 210/415 [00:19<00:19, 10.50it/s]#033[A#015Epoch 2:  98%|█████████▊| 7410/7585 [13:32<00:19,  9.12it/s, loss=0.616, v_num=1, val_loss_epoch=0.127, train_loss=0.746, val_loss_step=0.118][0m
[34m#015Validating:  58%|█████▊    | 240/415 [00:22<00:16, 10.44it/s]#033[A#015Epoch 2:  98%|█████████▊| 7440/7585 [13:35<00:15,  9.12it/s, loss=0.616, v_num=1, val_loss_epoch=0.127, train_loss=0.746, val_loss_step=0.118][0m
[34m#015Validating:  65%|██████▌   | 270/415 [00:25<00:14, 10.32it/s]#033[A#015Epoch 2:  98

[34m loss=0.516, v_num=1, val_loss_epoch=0.121, train_loss=0.398, val_loss_step=0.117]#015Epoch 3:  22%|██▏       | 1680/7585 [03:05<10:51,  9.06it/s, loss=0.605, v_num=1, val_loss_epoch=0.121, train_loss=0.224, val_loss_step=0.117]#015Epoch 3:  23%|██▎       | 1710/7585 [03:08<10:48,  9.06it/s, loss=0.605, v_num=1, val_loss_epoch=0.121, train_loss=0.224, val_loss_step=0.117]#015Epoch 3:  23%|██▎       | 1710/7585 [03:08<10:48,  9.06it/s, loss=0.564, v_num=1, val_loss_epoch=0.121, train_loss=0.563, val_loss_step=0.117]#015Epoch 3:  23%|██▎       | 1740/7585 [03:11<10:44,  9.06it/s, loss=0.564, v_num=1, val_loss_epoch=0.121, train_loss=0.563, val_loss_step=0.117]#015Epoch 3:  23%|██▎       | 1740/7585 [03:11<10:44,  9.06it/s, loss=0.577, v_num=1, val_loss_epoch=0.121, train_loss=0.266, val_loss_step=0.117]#015Epoch 3:  23%|██▎       | 1770/7585 [03:15<10:41,  9.07it/s, loss=0.577, v_num=1, val_loss_epoch=0.121, train_loss=0.266, val_loss_step=0.117]#015Epoch 3:  23%|██▎       | 1770/75

[34m0.814, v_num=1, val_loss_epoch=0.121, train_loss=0.882, val_loss_step=0.117]#015Epoch 3:  44%|████▍     | 3330/7585 [06:07<07:49,  9.07it/s, loss=0.814, v_num=1, val_loss_epoch=0.121, train_loss=0.882, val_loss_step=0.117]#015Epoch 3:  44%|████▍     | 3330/7585 [06:07<07:49,  9.07it/s, loss=0.59, v_num=1, val_loss_epoch=0.121, train_loss=0.224, val_loss_step=0.117] #015Epoch 3:  44%|████▍     | 3360/7585 [06:10<07:46,  9.07it/s, loss=0.59, v_num=1, val_loss_epoch=0.121, train_loss=0.224, val_loss_step=0.117]#015Epoch 3:  44%|████▍     | 3360/7585 [06:10<07:46,  9.07it/s, loss=0.472, v_num=1, val_loss_epoch=0.121, train_loss=0.271, val_loss_step=0.117]#015Epoch 3:  45%|████▍     | 3390/7585 [06:13<07:42,  9.07it/s, loss=0.472, v_num=1, val_loss_epoch=0.121, train_loss=0.271, val_loss_step=0.117]#015Epoch 3:  45%|████▍     | 3390/7585 [06:13<07:42,  9.07it/s, loss=0.487, v_num=1, val_loss_epoch=0.121, train_loss=0.678, val_loss_step=0.117]#015Epoch 3:  45%|████▌     | 3420/7585 [06:

[34m=0.438, v_num=1, val_loss_epoch=0.121, train_loss=0.423, val_loss_step=0.117]#015Epoch 3:  65%|██████▍   | 4920/7585 [09:02<04:53,  9.07it/s, loss=0.438, v_num=1, val_loss_epoch=0.121, train_loss=0.423, val_loss_step=0.117]#015Epoch 3:  65%|██████▍   | 4920/7585 [09:02<04:53,  9.07it/s, loss=0.638, v_num=1, val_loss_epoch=0.121, train_loss=1.360, val_loss_step=0.117]#015Epoch 3:  65%|██████▌   | 4950/7585 [09:05<04:50,  9.07it/s, loss=0.638, v_num=1, val_loss_epoch=0.121, train_loss=1.360, val_loss_step=0.117]#015Epoch 3:  65%|██████▌   | 4950/7585 [09:05<04:50,  9.07it/s, loss=0.667, v_num=1, val_loss_epoch=0.121, train_loss=0.410, val_loss_step=0.117]#015Epoch 3:  66%|██████▌   | 4980/7585 [09:09<04:47,  9.07it/s, loss=0.667, v_num=1, val_loss_epoch=0.121, train_loss=0.410, val_loss_step=0.117]#015Epoch 3:  66%|██████▌   | 4980/7585 [09:09<04:47,  9.07it/s, loss=0.534, v_num=1, val_loss_epoch=0.121, train_loss=0.585, val_loss_step=0.117]#015Epoch 3:  66%|██████▌   | 5010/7585 [0

[34main_loss=0.343, val_loss_step=0.117]#015Epoch 3:  85%|████████▌ | 6450/7585 [11:51<02:05,  9.07it/s, loss=0.385, v_num=1, val_loss_epoch=0.121, train_loss=0.426, val_loss_step=0.117]#015Epoch 3:  85%|████████▌ | 6480/7585 [11:54<02:01,  9.07it/s, loss=0.385, v_num=1, val_loss_epoch=0.121, train_loss=0.426, val_loss_step=0.117]#015Epoch 3:  85%|████████▌ | 6480/7585 [11:54<02:01,  9.07it/s, loss=0.653, v_num=1, val_loss_epoch=0.121, train_loss=0.809, val_loss_step=0.117]#015Epoch 3:  86%|████████▌ | 6510/7585 [11:58<01:58,  9.07it/s, loss=0.653, v_num=1, val_loss_epoch=0.121, train_loss=0.809, val_loss_step=0.117]#015Epoch 3:  86%|████████▌ | 6510/7585 [11:58<01:58,  9.07it/s, loss=0.544, v_num=1, val_loss_epoch=0.121, train_loss=0.443, val_loss_step=0.117]#015Epoch 3:  86%|████████▌ | 6540/7585 [12:01<01:55,  9.07it/s, loss=0.544, v_num=1, val_loss_epoch=0.121, train_loss=0.443, val_loss_step=0.117]#015Epoch 3:  86%|████████▌ | 6540/7585 [12:01<01:55,  9.07it/s, loss=0.505, v_num=

[34m#015Validating:  36%|███▌      | 150/415 [00:13<00:24, 10.76it/s]#033[A#015Epoch 3:  97%|█████████▋| 7350/7585 [13:25<00:25,  9.13it/s, loss=0.452, v_num=1, val_loss_epoch=0.121, train_loss=0.358, val_loss_step=0.117][0m
[34m#015Validating:  43%|████▎     | 180/415 [00:16<00:22, 10.59it/s]#033[A#015Epoch 3:  97%|█████████▋| 7380/7585 [13:28<00:22,  9.13it/s, loss=0.452, v_num=1, val_loss_epoch=0.121, train_loss=0.358, val_loss_step=0.117][0m
[34m#015Validating:  51%|█████     | 210/415 [00:19<00:19, 10.47it/s]#033[A#015Epoch 3:  98%|█████████▊| 7410/7585 [13:30<00:19,  9.14it/s, loss=0.452, v_num=1, val_loss_epoch=0.121, train_loss=0.358, val_loss_step=0.117][0m
[34m#015Validating:  58%|█████▊    | 240/415 [00:22<00:16, 10.42it/s]#033[A#015Epoch 3:  98%|█████████▊| 7440/7585 [13:33<00:15,  9.14it/s, loss=0.452, v_num=1, val_loss_epoch=0.121, train_loss=0.358, val_loss_step=0.117][0m
[34m#015Validating:  65%|██████▌   | 270/415 [00:25<00:14, 10.33it/s]#033[A#015Epoch 3:  98

[34m=0.119, train_loss=0.342, val_loss_step=0.0925]#015Epoch 4:  22%|██▏       | 1680/7585 [03:05<10:52,  9.05it/s, loss=0.444, v_num=1, val_loss_epoch=0.119, train_loss=0.342, val_loss_step=0.0925]#015Epoch 4:  22%|██▏       | 1680/7585 [03:05<10:52,  9.05it/s, loss=0.64, v_num=1, val_loss_epoch=0.119, train_loss=0.539, val_loss_step=0.0925] #015Epoch 4:  23%|██▎       | 1710/7585 [03:08<10:48,  9.05it/s, loss=0.64, v_num=1, val_loss_epoch=0.119, train_loss=0.539, val_loss_step=0.0925]#015Epoch 4:  23%|██▎       | 1710/7585 [03:08<10:48,  9.05it/s, loss=0.563, v_num=1, val_loss_epoch=0.119, train_loss=0.236, val_loss_step=0.0925]#015Epoch 4:  23%|██▎       | 1740/7585 [03:12<10:45,  9.05it/s, loss=0.563, v_num=1, val_loss_epoch=0.119, train_loss=0.236, val_loss_step=0.0925]#015Epoch 4:  23%|██▎       | 1740/7585 [03:12<10:45,  9.05it/s, loss=0.442, v_num=1, val_loss_epoch=0.119, train_loss=0.979, val_loss_step=0.0925]#015Epoch 4:  23%|██▎       | 1770/7585 [03:15<10:42,  9.05it/s, lo

[34mch 4:  44%|████▎     | 3300/7585 [06:04<07:52,  9.06it/s, loss=0.397, v_num=1, val_loss_epoch=0.119, train_loss=0.268, val_loss_step=0.0925]#015Epoch 4:  44%|████▎     | 3300/7585 [06:04<07:52,  9.06it/s, loss=0.499, v_num=1, val_loss_epoch=0.119, train_loss=0.421, val_loss_step=0.0925]#015Epoch 4:  44%|████▍     | 3330/7585 [06:07<07:49,  9.06it/s, loss=0.499, v_num=1, val_loss_epoch=0.119, train_loss=0.421, val_loss_step=0.0925]#015Epoch 4:  44%|████▍     | 3330/7585 [06:07<07:49,  9.06it/s, loss=0.528, v_num=1, val_loss_epoch=0.119, train_loss=0.596, val_loss_step=0.0925]#015Epoch 4:  44%|████▍     | 3360/7585 [06:10<07:46,  9.06it/s, loss=0.528, v_num=1, val_loss_epoch=0.119, train_loss=0.596, val_loss_step=0.0925]#015Epoch 4:  44%|████▍     | 3360/7585 [06:10<07:46,  9.06it/s, loss=0.44, v_num=1, val_loss_epoch=0.119, train_loss=0.320, val_loss_step=0.0925] #015Epoch 4:  45%|████▍     | 3390/7585 [06:14<07:43,  9.06it/s, loss=0.44, v_num=1, val_loss_epoch=0.119, train_loss=0.

[34m.06it/s, loss=0.485, v_num=1, val_loss_epoch=0.119, train_loss=0.628, val_loss_step=0.0925]#015Epoch 4:  64%|██████▍   | 4890/7585 [08:59<04:57,  9.06it/s, loss=0.485, v_num=1, val_loss_epoch=0.119, train_loss=0.628, val_loss_step=0.0925]#015Epoch 4:  64%|██████▍   | 4890/7585 [08:59<04:57,  9.06it/s, loss=0.673, v_num=1, val_loss_epoch=0.119, train_loss=0.235, val_loss_step=0.0925]#015Epoch 4:  65%|██████▍   | 4920/7585 [09:02<04:54,  9.06it/s, loss=0.673, v_num=1, val_loss_epoch=0.119, train_loss=0.235, val_loss_step=0.0925]#015Epoch 4:  65%|██████▍   | 4920/7585 [09:02<04:54,  9.06it/s, loss=0.443, v_num=1, val_loss_epoch=0.119, train_loss=0.434, val_loss_step=0.0925]#015Epoch 4:  65%|██████▌   | 4950/7585 [09:06<04:50,  9.06it/s, loss=0.443, v_num=1, val_loss_epoch=0.119, train_loss=0.434, val_loss_step=0.0925]#015Epoch 4:  65%|██████▌   | 4950/7585 [09:06<04:50,  9.06it/s, loss=0.566, v_num=1, val_loss_epoch=0.119, train_loss=0.448, val_loss_step=0.0925]#015Epoch 4:  66%|████

[34m████████▍ | 6420/7585 [11:48<02:08,  9.07it/s, loss=0.476, v_num=1, val_loss_epoch=0.119, train_loss=0.339, val_loss_step=0.0925]#015Epoch 4:  85%|████████▍ | 6420/7585 [11:48<02:08,  9.07it/s, loss=0.547, v_num=1, val_loss_epoch=0.119, train_loss=0.733, val_loss_step=0.0925]#015Epoch 4:  85%|████████▌ | 6450/7585 [11:51<02:05,  9.07it/s, loss=0.547, v_num=1, val_loss_epoch=0.119, train_loss=0.733, val_loss_step=0.0925]#015Epoch 4:  85%|████████▌ | 6450/7585 [11:51<02:05,  9.07it/s, loss=0.586, v_num=1, val_loss_epoch=0.119, train_loss=0.436, val_loss_step=0.0925]#015Epoch 4:  85%|████████▌ | 6480/7585 [11:54<02:01,  9.07it/s, loss=0.586, v_num=1, val_loss_epoch=0.119, train_loss=0.436, val_loss_step=0.0925]#015Epoch 4:  85%|████████▌ | 6480/7585 [11:54<02:01,  9.07it/s, loss=0.539, v_num=1, val_loss_epoch=0.119, train_loss=1.100, val_loss_step=0.0925]#015Epoch 4:  86%|████████▌ | 6510/7585 [11:57<01:58,  9.07it/s, loss=0.539, v_num=1, val_loss_epoch=0.119, train_loss=1.100, val_l

[34m#015Validating:  36%|███▌      | 150/415 [00:13<00:24, 10.78it/s]#033[A#015Epoch 4:  97%|█████████▋| 7350/7585 [13:25<00:25,  9.13it/s, loss=0.503, v_num=1, val_loss_epoch=0.119, train_loss=0.444, val_loss_step=0.0925][0m
[34m#015Validating:  43%|████▎     | 180/415 [00:16<00:21, 10.69it/s]#033[A#015Epoch 4:  97%|█████████▋| 7380/7585 [13:28<00:22,  9.13it/s, loss=0.503, v_num=1, val_loss_epoch=0.119, train_loss=0.444, val_loss_step=0.0925][0m
[34m#015Validating:  51%|█████     | 210/415 [00:19<00:19, 10.53it/s]#033[A#015Epoch 4:  98%|█████████▊| 7410/7585 [13:31<00:19,  9.14it/s, loss=0.503, v_num=1, val_loss_epoch=0.119, train_loss=0.444, val_loss_step=0.0925][0m
[34m#015Validating:  58%|█████▊    | 240/415 [00:22<00:16, 10.45it/s]#033[A#015Epoch 4:  98%|█████████▊| 7440/7585 [13:33<00:15,  9.14it/s, loss=0.503, v_num=1, val_loss_epoch=0.119, train_loss=0.444, val_loss_step=0.0925][0m
[34m#015Validating:  65%|██████▌   | 270/415 [00:25<00:13, 10.36it/s]#033[A#015Epoch 4:

[34m17, train_loss=0.464, val_loss_step=0.0823]#015Epoch 5:  22%|██▏       | 1680/7585 [03:05<10:53,  9.04it/s, loss=0.466, v_num=1, val_loss_epoch=0.117, train_loss=0.464, val_loss_step=0.0823]#015Epoch 5:  22%|██▏       | 1680/7585 [03:05<10:53,  9.04it/s, loss=0.517, v_num=1, val_loss_epoch=0.117, train_loss=0.361, val_loss_step=0.0823]#015Epoch 5:  23%|██▎       | 1710/7585 [03:09<10:50,  9.03it/s, loss=0.517, v_num=1, val_loss_epoch=0.117, train_loss=0.361, val_loss_step=0.0823]#015Epoch 5:  23%|██▎       | 1710/7585 [03:09<10:50,  9.03it/s, loss=0.494, v_num=1, val_loss_epoch=0.117, train_loss=0.346, val_loss_step=0.0823]#015Epoch 5:  23%|██▎       | 1740/7585 [03:12<10:46,  9.04it/s, loss=0.494, v_num=1, val_loss_epoch=0.117, train_loss=0.346, val_loss_step=0.0823]#015Epoch 5:  23%|██▎       | 1740/7585 [03:12<10:46,  9.04it/s, loss=0.441, v_num=1, val_loss_epoch=0.117, train_loss=0.612, val_loss_step=0.0823]#015Epoch 5:  23%|██▎       | 1770/7585 [03:15<10:43,  9.04it/s, loss=

[34m:  44%|████▎     | 3300/7585 [06:04<07:53,  9.05it/s, loss=0.484, v_num=1, val_loss_epoch=0.117, train_loss=0.307, val_loss_step=0.0823]#015Epoch 5:  44%|████▎     | 3300/7585 [06:04<07:53,  9.05it/s, loss=0.396, v_num=1, val_loss_epoch=0.117, train_loss=0.254, val_loss_step=0.0823]#015Epoch 5:  44%|████▍     | 3330/7585 [06:08<07:50,  9.05it/s, loss=0.396, v_num=1, val_loss_epoch=0.117, train_loss=0.254, val_loss_step=0.0823]#015Epoch 5:  44%|████▍     | 3330/7585 [06:08<07:50,  9.05it/s, loss=0.518, v_num=1, val_loss_epoch=0.117, train_loss=0.492, val_loss_step=0.0823]#015Epoch 5:  44%|████▍     | 3360/7585 [06:11<07:46,  9.05it/s, loss=0.518, v_num=1, val_loss_epoch=0.117, train_loss=0.492, val_loss_step=0.0823]#015Epoch 5:  44%|████▍     | 3360/7585 [06:11<07:46,  9.05it/s, loss=0.583, v_num=1, val_loss_epoch=0.117, train_loss=0.180, val_loss_step=0.0823]#015Epoch 5:  45%|████▍     | 3390/7585 [06:14<07:43,  9.05it/s, loss=0.583, v_num=1, val_loss_epoch=0.117, train_loss=0.180

[34m.05it/s, loss=0.444, v_num=1, val_loss_epoch=0.117, train_loss=0.178, val_loss_step=0.0823]#015Epoch 5:  64%|██████▍   | 4890/7585 [09:00<04:57,  9.05it/s, loss=0.444, v_num=1, val_loss_epoch=0.117, train_loss=0.178, val_loss_step=0.0823]#015Epoch 5:  64%|██████▍   | 4890/7585 [09:00<04:57,  9.05it/s, loss=0.558, v_num=1, val_loss_epoch=0.117, train_loss=0.350, val_loss_step=0.0823]#015Epoch 5:  65%|██████▍   | 4920/7585 [09:03<04:54,  9.05it/s, loss=0.558, v_num=1, val_loss_epoch=0.117, train_loss=0.350, val_loss_step=0.0823]#015Epoch 5:  65%|██████▍   | 4920/7585 [09:03<04:54,  9.05it/s, loss=0.342, v_num=1, val_loss_epoch=0.117, train_loss=0.191, val_loss_step=0.0823]#015Epoch 5:  65%|██████▌   | 4950/7585 [09:06<04:51,  9.05it/s, loss=0.342, v_num=1, val_loss_epoch=0.117, train_loss=0.191, val_loss_step=0.0823]#015Epoch 5:  65%|██████▌   | 4950/7585 [09:06<04:51,  9.05it/s, loss=0.452, v_num=1, val_loss_epoch=0.117, train_loss=0.251, val_loss_step=0.0823]#015Epoch 5:  66%|████

[34m███████▍ | 6420/7585 [11:49<02:08,  9.05it/s, loss=0.428, v_num=1, val_loss_epoch=0.117, train_loss=0.494, val_loss_step=0.0823]#015Epoch 5:  85%|████████▍ | 6420/7585 [11:49<02:08,  9.05it/s, loss=0.412, v_num=1, val_loss_epoch=0.117, train_loss=0.165, val_loss_step=0.0823]#015Epoch 5:  85%|████████▌ | 6450/7585 [11:52<02:05,  9.05it/s, loss=0.412, v_num=1, val_loss_epoch=0.117, train_loss=0.165, val_loss_step=0.0823]#015Epoch 5:  85%|████████▌ | 6450/7585 [11:52<02:05,  9.05it/s, loss=0.441, v_num=1, val_loss_epoch=0.117, train_loss=0.402, val_loss_step=0.0823]#015Epoch 5:  85%|████████▌ | 6480/7585 [11:55<02:02,  9.05it/s, loss=0.441, v_num=1, val_loss_epoch=0.117, train_loss=0.402, val_loss_step=0.0823]#015Epoch 5:  85%|████████▌ | 6480/7585 [11:55<02:02,  9.05it/s, loss=0.533, v_num=1, val_loss_epoch=0.117, train_loss=0.414, val_loss_step=0.0823]#015Epoch 5:  86%|████████▌ | 6510/7585 [11:59<01:58,  9.05it/s, loss=0.533, v_num=1, val_loss_epoch=0.117, train_loss=0.414, val_lo

[34m#015Validating:  36%|███▌      | 150/415 [00:13<00:24, 10.80it/s]#033[A#015Epoch 5:  97%|█████████▋| 7350/7585 [13:26<00:25,  9.12it/s, loss=0.546, v_num=1, val_loss_epoch=0.117, train_loss=0.657, val_loss_step=0.0823][0m
[34m#015Validating:  43%|████▎     | 180/415 [00:16<00:21, 10.69it/s]#033[A#015Epoch 5:  97%|█████████▋| 7380/7585 [13:29<00:22,  9.12it/s, loss=0.546, v_num=1, val_loss_epoch=0.117, train_loss=0.657, val_loss_step=0.0823][0m
[34m#015Validating:  51%|█████     | 210/415 [00:19<00:19, 10.52it/s]#033[A#015Epoch 5:  98%|█████████▊| 7410/7585 [13:32<00:19,  9.12it/s, loss=0.546, v_num=1, val_loss_epoch=0.117, train_loss=0.657, val_loss_step=0.0823][0m
[34m#015Validating:  58%|█████▊    | 240/415 [00:22<00:16, 10.45it/s]#033[A#015Epoch 5:  98%|█████████▊| 7440/7585 [13:35<00:15,  9.13it/s, loss=0.546, v_num=1, val_loss_epoch=0.117, train_loss=0.657, val_loss_step=0.0823][0m
[34m#015Validating:  65%|██████▌   | 270/415 [00:25<00:13, 10.36it/s]#033[A#015Epoch 5:

[34mss=0.551, v_num=0, val_loss_epoch=0.684, train_loss=0.525]#015Epoch 0:  16%|█▌        | 1950/12538 [03:35<19:32,  9.03it/s, loss=0.551, v_num=0, val_loss_epoch=0.684, train_loss=0.525]#015Epoch 0:  16%|█▌        | 1950/12538 [03:35<19:32,  9.03it/s, loss=0.537, v_num=0, val_loss_epoch=0.684, train_loss=0.573]#015Epoch 0:  16%|█▌        | 1980/12538 [03:39<19:28,  9.04it/s, loss=0.537, v_num=0, val_loss_epoch=0.684, train_loss=0.573]#015Epoch 0:  16%|█▌        | 1980/12538 [03:39<19:28,  9.04it/s, loss=0.55, v_num=0, val_loss_epoch=0.684, train_loss=0.630] #015Epoch 0:  16%|█▌        | 2010/12538 [03:42<19:25,  9.04it/s, loss=0.55, v_num=0, val_loss_epoch=0.684, train_loss=0.630]#015Epoch 0:  16%|█▌        | 2010/12538 [03:42<19:25,  9.04it/s, loss=0.571, v_num=0, val_loss_epoch=0.684, train_loss=0.521]#015Epoch 0:  16%|█▋        | 2040/12538 [03:45<19:21,  9.04it/s, loss=0.571, v_num=0, val_loss_epoch=0.684, train_loss=0.521]#015Epoch 0:  16%|█▋        | 2040/12538 [03:45<19:21,  

[34m04<16:00,  9.05it/s, loss=0.523, v_num=0, val_loss_epoch=0.684, train_loss=0.565]#015Epoch 0:  31%|███       | 3870/12538 [07:07<15:57,  9.05it/s, loss=0.523, v_num=0, val_loss_epoch=0.684, train_loss=0.565]#015Epoch 0:  31%|███       | 3870/12538 [07:07<15:57,  9.05it/s, loss=0.523, v_num=0, val_loss_epoch=0.684, train_loss=0.591]#015Epoch 0:  31%|███       | 3900/12538 [07:10<15:54,  9.05it/s, loss=0.523, v_num=0, val_loss_epoch=0.684, train_loss=0.591]#015Epoch 0:  31%|███       | 3900/12538 [07:10<15:54,  9.05it/s, loss=0.521, v_num=0, val_loss_epoch=0.684, train_loss=0.534]#015Epoch 0:  31%|███▏      | 3930/12538 [07:14<15:50,  9.05it/s, loss=0.521, v_num=0, val_loss_epoch=0.684, train_loss=0.534]#015Epoch 0:  31%|███▏      | 3930/12538 [07:14<15:50,  9.05it/s, loss=0.511, v_num=0, val_loss_epoch=0.684, train_loss=0.528]#015Epoch 0:  32%|███▏      | 3960/12538 [07:17<15:47,  9.05it/s, loss=0.511, v_num=0, val_loss_epoch=0.684, train_loss=0.528]#015Epoch 0:  32%|███▏      | 39

[34m54]#015Epoch 0:  46%|████▌     | 5730/12538 [10:32<12:31,  9.06it/s, loss=0.525, v_num=0, val_loss_epoch=0.684, train_loss=0.554]#015Epoch 0:  46%|████▌     | 5730/12538 [10:32<12:31,  9.06it/s, loss=0.503, v_num=0, val_loss_epoch=0.684, train_loss=0.486]#015Epoch 0:  46%|████▌     | 5760/12538 [10:35<12:28,  9.06it/s, loss=0.503, v_num=0, val_loss_epoch=0.684, train_loss=0.486]#015Epoch 0:  46%|████▌     | 5760/12538 [10:35<12:28,  9.06it/s, loss=0.508, v_num=0, val_loss_epoch=0.684, train_loss=0.536]#015Epoch 0:  46%|████▌     | 5790/12538 [10:39<12:24,  9.06it/s, loss=0.508, v_num=0, val_loss_epoch=0.684, train_loss=0.536]#015Epoch 0:  46%|████▌     | 5790/12538 [10:39<12:24,  9.06it/s, loss=0.509, v_num=0, val_loss_epoch=0.684, train_loss=0.484]#015Epoch 0:  46%|████▋     | 5820/12538 [10:42<12:21,  9.06it/s, loss=0.509, v_num=0, val_loss_epoch=0.684, train_loss=0.484]#015Epoch 0:  46%|████▋     | 5820/12538 [10:42<12:21,  9.06it/s, loss=0.537, v_num=0, val_loss_epoch=0.684, t

[34m%|██████    | 7560/12538 [13:54<09:09,  9.06it/s, loss=0.512, v_num=0, val_loss_epoch=0.684, train_loss=0.406]#015Epoch 0:  60%|██████    | 7560/12538 [13:54<09:09,  9.06it/s, loss=0.521, v_num=0, val_loss_epoch=0.684, train_loss=0.475]#015Epoch 0:  61%|██████    | 7590/12538 [13:57<09:06,  9.06it/s, loss=0.521, v_num=0, val_loss_epoch=0.684, train_loss=0.475]#015Epoch 0:  61%|██████    | 7590/12538 [13:57<09:06,  9.06it/s, loss=0.504, v_num=0, val_loss_epoch=0.684, train_loss=0.645]#015Epoch 0:  61%|██████    | 7620/12538 [14:01<09:02,  9.06it/s, loss=0.504, v_num=0, val_loss_epoch=0.684, train_loss=0.645]#015Epoch 0:  61%|██████    | 7620/12538 [14:01<09:02,  9.06it/s, loss=0.502, v_num=0, val_loss_epoch=0.684, train_loss=0.519]#015Epoch 0:  61%|██████    | 7650/12538 [14:04<08:59,  9.06it/s, loss=0.502, v_num=0, val_loss_epoch=0.684, train_loss=0.519]#015Epoch 0:  61%|██████    | 7650/12538 [14:04<08:59,  9.06it/s, loss=0.504, v_num=0, val_loss_epoch=0.684, train_loss=0.454]#01

[34m54,  9.06it/s, loss=0.516, v_num=0, val_loss_epoch=0.684, train_loss=0.528]#015Epoch 0:  75%|███████▍  | 9360/12538 [17:13<05:50,  9.06it/s, loss=0.516, v_num=0, val_loss_epoch=0.684, train_loss=0.528]#015Epoch 0:  75%|███████▍  | 9360/12538 [17:13<05:50,  9.06it/s, loss=0.501, v_num=0, val_loss_epoch=0.684, train_loss=0.532]#015Epoch 0:  75%|███████▍  | 9390/12538 [17:16<05:47,  9.06it/s, loss=0.501, v_num=0, val_loss_epoch=0.684, train_loss=0.532]#015Epoch 0:  75%|███████▍  | 9390/12538 [17:16<05:47,  9.06it/s, loss=0.487, v_num=0, val_loss_epoch=0.684, train_loss=0.417]#015Epoch 0:  75%|███████▌  | 9420/12538 [17:19<05:44,  9.06it/s, loss=0.487, v_num=0, val_loss_epoch=0.684, train_loss=0.417]#015Epoch 0:  75%|███████▌  | 9420/12538 [17:19<05:44,  9.06it/s, loss=0.496, v_num=0, val_loss_epoch=0.684, train_loss=0.600]#015Epoch 0:  75%|███████▌  | 9450/12538 [17:23<05:40,  9.06it/s, loss=0.496, v_num=0, val_loss_epoch=0.684, train_loss=0.600]#015Epoch 0:  75%|███████▌  | 9450/125

[34m#015Epoch 0:  89%|████████▊ | 11100/12538 [20:25<02:38,  9.06it/s, loss=0.506, v_num=0, val_loss_epoch=0.684, train_loss=0.519]#015Epoch 0:  89%|████████▊ | 11100/12538 [20:25<02:38,  9.06it/s, loss=0.506, v_num=0, val_loss_epoch=0.684, train_loss=0.528]#015Epoch 0:  89%|████████▉ | 11130/12538 [20:28<02:35,  9.06it/s, loss=0.506, v_num=0, val_loss_epoch=0.684, train_loss=0.528]#015Epoch 0:  89%|████████▉ | 11130/12538 [20:28<02:35,  9.06it/s, loss=0.484, v_num=0, val_loss_epoch=0.684, train_loss=0.568]#015Epoch 0:  89%|████████▉ | 11160/12538 [20:31<02:32,  9.06it/s, loss=0.484, v_num=0, val_loss_epoch=0.684, train_loss=0.568]#015Epoch 0:  89%|████████▉ | 11160/12538 [20:31<02:32,  9.06it/s, loss=0.492, v_num=0, val_loss_epoch=0.684, train_loss=0.343]#015Epoch 0:  89%|████████▉ | 11190/12538 [20:34<02:28,  9.06it/s, loss=0.492, v_num=0, val_loss_epoch=0.684, train_loss=0.343]#015Epoch 0:  89%|████████▉ | 11190/12538 [20:34<02:28,  9.06it/s, loss=0.491, v_num=0, val_loss_epoch=0.6

[34m#015Validating:  50%|█████     | 330/660 [00:30<00:31, 10.58it/s]#033[A#015Epoch 0:  97%|█████████▋| 12210/12538 [22:22<00:36,  9.10it/s, loss=0.513, v_num=0, val_loss_epoch=0.684, train_loss=0.476][0m
[34m#015Validating:  55%|█████▍    | 360/660 [00:33<00:28, 10.47it/s]#033[A#015Epoch 0:  98%|█████████▊| 12240/12538 [22:25<00:32,  9.10it/s, loss=0.513, v_num=0, val_loss_epoch=0.684, train_loss=0.476][0m
[34m#015Validating:  59%|█████▉    | 390/660 [00:36<00:25, 10.50it/s]#033[A#015Epoch 0:  98%|█████████▊| 12270/12538 [22:28<00:29,  9.10it/s, loss=0.513, v_num=0, val_loss_epoch=0.684, train_loss=0.476][0m
[34m#015Validating:  64%|██████▎   | 420/660 [00:39<00:23, 10.37it/s]#033[A#015Epoch 0:  98%|█████████▊| 12300/12538 [22:31<00:26,  9.10it/s, loss=0.513, v_num=0, val_loss_epoch=0.684, train_loss=0.476][0m
[34m#015Validating:  68%|██████▊   | 450/660 [00:42<00:20, 10.31it/s]#033[A#015Epoch 0:  98%|█████████▊| 12330/12538 [22:34<00:22,  9.11it/s, loss=0.513, v_num=0, val_

[34m3:05<20:01,  9.04it/s, loss=0.483, v_num=0, val_loss_epoch=0.523, train_loss=0.477, val_loss_step=1.410]#015Epoch 1:  13%|█▎        | 1680/12538 [03:05<20:01,  9.04it/s, loss=0.485, v_num=0, val_loss_epoch=0.523, train_loss=0.441, val_loss_step=1.410]#015Epoch 1:  14%|█▎        | 1710/12538 [03:09<19:57,  9.04it/s, loss=0.485, v_num=0, val_loss_epoch=0.523, train_loss=0.441, val_loss_step=1.410]#015Epoch 1:  14%|█▎        | 1710/12538 [03:09<19:57,  9.04it/s, loss=0.495, v_num=0, val_loss_epoch=0.523, train_loss=0.539, val_loss_step=1.410]#015Epoch 1:  14%|█▍        | 1740/12538 [03:12<19:54,  9.04it/s, loss=0.495, v_num=0, val_loss_epoch=0.523, train_loss=0.539, val_loss_step=1.410]#015Epoch 1:  14%|█▍        | 1740/12538 [03:12<19:54,  9.04it/s, loss=0.491, v_num=0, val_loss_epoch=0.523, train_loss=0.481, val_loss_step=1.410]#015Epoch 1:  14%|█▍        | 1770/12538 [03:15<19:50,  9.04it/s, loss=0.491, v_num=0, val_loss_epoch=0.523, train_loss=0.481, val_loss_step=1.410]#015Epoch

[34mloss=0.497, v_num=0, val_loss_epoch=0.523, train_loss=0.467, val_loss_step=1.410]#015Epoch 1:  27%|██▋       | 3330/12538 [06:08<16:57,  9.05it/s, loss=0.491, v_num=0, val_loss_epoch=0.523, train_loss=0.499, val_loss_step=1.410]#015Epoch 1:  27%|██▋       | 3360/12538 [06:11<16:54,  9.05it/s, loss=0.491, v_num=0, val_loss_epoch=0.523, train_loss=0.499, val_loss_step=1.410]#015Epoch 1:  27%|██▋       | 3360/12538 [06:11<16:54,  9.05it/s, loss=0.481, v_num=0, val_loss_epoch=0.523, train_loss=0.471, val_loss_step=1.410]#015Epoch 1:  27%|██▋       | 3390/12538 [06:14<16:51,  9.05it/s, loss=0.481, v_num=0, val_loss_epoch=0.523, train_loss=0.471, val_loss_step=1.410]#015Epoch 1:  27%|██▋       | 3390/12538 [06:14<16:51,  9.05it/s, loss=0.479, v_num=0, val_loss_epoch=0.523, train_loss=0.466, val_loss_step=1.410]#015Epoch 1:  27%|██▋       | 3420/12538 [06:17<16:47,  9.05it/s, loss=0.479, v_num=0, val_loss_epoch=0.523, train_loss=0.466, val_loss_step=1.410]#015Epoch 1:  27%|██▋       | 34

[34m58, val_loss_step=1.410]#015Epoch 1:  39%|███▉      | 4950/12538 [09:07<13:58,  9.04it/s, loss=0.485, v_num=0, val_loss_epoch=0.523, train_loss=0.411, val_loss_step=1.410]#015Epoch 1:  40%|███▉      | 4980/12538 [09:10<13:55,  9.04it/s, loss=0.485, v_num=0, val_loss_epoch=0.523, train_loss=0.411, val_loss_step=1.410]#015Epoch 1:  40%|███▉      | 4980/12538 [09:10<13:55,  9.04it/s, loss=0.492, v_num=0, val_loss_epoch=0.523, train_loss=0.451, val_loss_step=1.410]#015Epoch 1:  40%|███▉      | 5010/12538 [09:13<13:52,  9.05it/s, loss=0.492, v_num=0, val_loss_epoch=0.523, train_loss=0.451, val_loss_step=1.410]#015Epoch 1:  40%|███▉      | 5010/12538 [09:13<13:52,  9.05it/s, loss=0.469, v_num=0, val_loss_epoch=0.523, train_loss=0.463, val_loss_step=1.410]#015Epoch 1:  40%|████      | 5040/12538 [09:17<13:48,  9.04it/s, loss=0.469, v_num=0, val_loss_epoch=0.523, train_loss=0.463, val_loss_step=1.410]#015Epoch 1:  40%|████      | 5040/12538 [09:17<13:48,  9.04it/s, loss=0.495, v_num=0, va

[34m,  9.04it/s, loss=0.514, v_num=0, val_loss_epoch=0.523, train_loss=0.486, val_loss_step=1.410]#015Epoch 1:  52%|█████▏    | 6570/12538 [12:06<10:59,  9.05it/s, loss=0.514, v_num=0, val_loss_epoch=0.523, train_loss=0.486, val_loss_step=1.410]#015Epoch 1:  52%|█████▏    | 6570/12538 [12:06<10:59,  9.05it/s, loss=0.456, v_num=0, val_loss_epoch=0.523, train_loss=0.423, val_loss_step=1.410]#015Epoch 1:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.05it/s, loss=0.456, v_num=0, val_loss_epoch=0.523, train_loss=0.423, val_loss_step=1.410]#015Epoch 1:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.05it/s, loss=0.486, v_num=0, val_loss_epoch=0.523, train_loss=0.476, val_loss_step=1.410]#015Epoch 1:  53%|█████▎    | 6630/12538 [12:12<10:53,  9.05it/s, loss=0.486, v_num=0, val_loss_epoch=0.523, train_loss=0.476, val_loss_step=1.410]#015Epoch 1:  53%|█████▎    | 6630/12538 [12:12<10:53,  9.05it/s, loss=0.466, v_num=0, val_loss_epoch=0.523, train_loss=0.590, val_loss_step=1.410]#015Epoch 1:  53%|█

[34m%|██████▍   | 8130/12538 [14:59<08:07,  9.04it/s, loss=0.46, v_num=0, val_loss_epoch=0.523, train_loss=0.366, val_loss_step=1.410]#015Epoch 1:  65%|██████▍   | 8130/12538 [14:59<08:07,  9.04it/s, loss=0.496, v_num=0, val_loss_epoch=0.523, train_loss=0.501, val_loss_step=1.410]#015Epoch 1:  65%|██████▌   | 8160/12538 [15:02<08:04,  9.04it/s, loss=0.496, v_num=0, val_loss_epoch=0.523, train_loss=0.501, val_loss_step=1.410]#015Epoch 1:  65%|██████▌   | 8160/12538 [15:02<08:04,  9.04it/s, loss=0.486, v_num=0, val_loss_epoch=0.523, train_loss=0.481, val_loss_step=1.410]#015Epoch 1:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.04it/s, loss=0.486, v_num=0, val_loss_epoch=0.523, train_loss=0.481, val_loss_step=1.410]#015Epoch 1:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.04it/s, loss=0.472, v_num=0, val_loss_epoch=0.523, train_loss=0.431, val_loss_step=1.410]#015Epoch 1:  66%|██████▌   | 8220/12538 [15:08<07:57,  9.04it/s, loss=0.472, v_num=0, val_loss_epoch=0.523, train_loss=0.431, val

[34m��▋  | 9660/12538 [17:48<05:18,  9.04it/s, loss=0.445, v_num=0, val_loss_epoch=0.523, train_loss=0.487, val_loss_step=1.410]#015Epoch 1:  77%|███████▋  | 9690/12538 [17:51<05:14,  9.04it/s, loss=0.445, v_num=0, val_loss_epoch=0.523, train_loss=0.487, val_loss_step=1.410]#015Epoch 1:  77%|███████▋  | 9690/12538 [17:51<05:14,  9.04it/s, loss=0.5, v_num=0, val_loss_epoch=0.523, train_loss=0.522, val_loss_step=1.410]  #015Epoch 1:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.04it/s, loss=0.5, v_num=0, val_loss_epoch=0.523, train_loss=0.522, val_loss_step=1.410]#015Epoch 1:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.04it/s, loss=0.479, v_num=0, val_loss_epoch=0.523, train_loss=0.529, val_loss_step=1.410]#015Epoch 1:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.04it/s, loss=0.479, v_num=0, val_loss_epoch=0.523, train_loss=0.529, val_loss_step=1.410]#015Epoch 1:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.04it/s, loss=0.477, v_num=0, val_loss_epoch=0.523, train_loss=0.649, val_loss_st

[34m8 [20:37<02:29,  9.04it/s, loss=0.479, v_num=0, val_loss_epoch=0.523, train_loss=0.439, val_loss_step=1.410]#015Epoch 1:  89%|████████▉ | 11190/12538 [20:37<02:29,  9.04it/s, loss=0.473, v_num=0, val_loss_epoch=0.523, train_loss=0.562, val_loss_step=1.410]#015Epoch 1:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.04it/s, loss=0.473, v_num=0, val_loss_epoch=0.523, train_loss=0.562, val_loss_step=1.410]#015Epoch 1:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.04it/s, loss=0.479, v_num=0, val_loss_epoch=0.523, train_loss=0.451, val_loss_step=1.410]#015Epoch 1:  90%|████████▉ | 11250/12538 [20:43<02:22,  9.04it/s, loss=0.479, v_num=0, val_loss_epoch=0.523, train_loss=0.451, val_loss_step=1.410]#015Epoch 1:  90%|████████▉ | 11250/12538 [20:43<02:22,  9.04it/s, loss=0.487, v_num=0, val_loss_epoch=0.523, train_loss=0.399, val_loss_step=1.410]#015Epoch 1:  90%|████████▉ | 11280/12538 [20:47<02:19,  9.04it/s, loss=0.487, v_num=0, val_loss_epoch=0.523, train_loss=0.399, val_loss_step=1.410

[34m#015Validating:  32%|███▏      | 210/660 [00:19<00:41, 10.82it/s]#033[A#015Epoch 1:  96%|█████████▋| 12090/12538 [22:13<00:49,  9.06it/s, loss=0.453, v_num=0, val_loss_epoch=0.523, train_loss=0.444, val_loss_step=1.410][0m
[34m#015Validating:  36%|███▋      | 240/660 [00:22<00:38, 10.78it/s]#033[A#015Epoch 1:  97%|█████████▋| 12120/12538 [22:16<00:46,  9.07it/s, loss=0.453, v_num=0, val_loss_epoch=0.523, train_loss=0.444, val_loss_step=1.410][0m
[34m#015Validating:  41%|████      | 270/660 [00:25<00:36, 10.69it/s]#033[A#015Epoch 1:  97%|█████████▋| 12150/12538 [22:19<00:42,  9.07it/s, loss=0.453, v_num=0, val_loss_epoch=0.523, train_loss=0.444, val_loss_step=1.410][0m
[34m#015Validating:  45%|████▌     | 300/660 [00:28<00:34, 10.56it/s]#033[A#015Epoch 1:  97%|█████████▋| 12180/12538 [22:22<00:39,  9.07it/s, loss=0.453, v_num=0, val_loss_epoch=0.523, train_loss=0.444, val_loss_step=1.410][0m
[34m#015Validating:  50%|█████     | 330/660 [00:31<00:31, 10.45it/s]#033[A#015Epoc

[34m0:02,  9.03it/s, loss=0.477, v_num=0, val_loss_epoch=0.510, train_loss=0.554, val_loss_step=1.510]#015Epoch 2:  13%|█▎        | 1680/12538 [03:06<20:02,  9.03it/s, loss=0.489, v_num=0, val_loss_epoch=0.510, train_loss=0.439, val_loss_step=1.510]#015Epoch 2:  14%|█▎        | 1710/12538 [03:09<19:59,  9.03it/s, loss=0.489, v_num=0, val_loss_epoch=0.510, train_loss=0.439, val_loss_step=1.510]#015Epoch 2:  14%|█▎        | 1710/12538 [03:09<19:59,  9.03it/s, loss=0.484, v_num=0, val_loss_epoch=0.510, train_loss=0.537, val_loss_step=1.510]#015Epoch 2:  14%|█▍        | 1740/12538 [03:12<19:55,  9.03it/s, loss=0.484, v_num=0, val_loss_epoch=0.510, train_loss=0.537, val_loss_step=1.510]#015Epoch 2:  14%|█▍        | 1740/12538 [03:12<19:55,  9.03it/s, loss=0.456, v_num=0, val_loss_epoch=0.510, train_loss=0.484, val_loss_step=1.510]#015Epoch 2:  14%|█▍        | 1770/12538 [03:15<19:52,  9.03it/s, loss=0.456, v_num=0, val_loss_epoch=0.510, train_loss=0.484, val_loss_step=1.510]#015Epoch 2:  1

[34m, loss=0.462, v_num=0, val_loss_epoch=0.510, train_loss=0.389, val_loss_step=1.510]#015Epoch 2:  27%|██▋       | 3330/12538 [06:08<16:57,  9.05it/s, loss=0.488, v_num=0, val_loss_epoch=0.510, train_loss=0.492, val_loss_step=1.510]#015Epoch 2:  27%|██▋       | 3360/12538 [06:11<16:54,  9.05it/s, loss=0.488, v_num=0, val_loss_epoch=0.510, train_loss=0.492, val_loss_step=1.510]#015Epoch 2:  27%|██▋       | 3360/12538 [06:11<16:54,  9.05it/s, loss=0.478, v_num=0, val_loss_epoch=0.510, train_loss=0.347, val_loss_step=1.510]#015Epoch 2:  27%|██▋       | 3390/12538 [06:14<16:51,  9.05it/s, loss=0.478, v_num=0, val_loss_epoch=0.510, train_loss=0.347, val_loss_step=1.510]#015Epoch 2:  27%|██▋       | 3390/12538 [06:14<16:51,  9.05it/s, loss=0.47, v_num=0, val_loss_epoch=0.510, train_loss=0.501, val_loss_step=1.510] #015Epoch 2:  27%|██▋       | 3420/12538 [06:18<16:47,  9.05it/s, loss=0.47, v_num=0, val_loss_epoch=0.510, train_loss=0.501, val_loss_step=1.510]#015Epoch 2:  27%|██▋       | 3

[34mss=0.460, val_loss_step=1.510]#015Epoch 2:  39%|███▉      | 4950/12538 [09:07<13:58,  9.04it/s, loss=0.459, v_num=0, val_loss_epoch=0.510, train_loss=0.519, val_loss_step=1.510]#015Epoch 2:  40%|███▉      | 4980/12538 [09:10<13:55,  9.05it/s, loss=0.459, v_num=0, val_loss_epoch=0.510, train_loss=0.519, val_loss_step=1.510]#015Epoch 2:  40%|███▉      | 4980/12538 [09:10<13:55,  9.05it/s, loss=0.472, v_num=0, val_loss_epoch=0.510, train_loss=0.513, val_loss_step=1.510]#015Epoch 2:  40%|███▉      | 5010/12538 [09:13<13:52,  9.05it/s, loss=0.472, v_num=0, val_loss_epoch=0.510, train_loss=0.513, val_loss_step=1.510]#015Epoch 2:  40%|███▉      | 5010/12538 [09:13<13:52,  9.05it/s, loss=0.467, v_num=0, val_loss_epoch=0.510, train_loss=0.482, val_loss_step=1.510]#015Epoch 2:  40%|████      | 5040/12538 [09:17<13:48,  9.05it/s, loss=0.467, v_num=0, val_loss_epoch=0.510, train_loss=0.482, val_loss_step=1.510]#015Epoch 2:  40%|████      | 5040/12538 [09:17<13:48,  9.05it/s, loss=0.463, v_num

[34m12:02<11:03,  9.05it/s, loss=0.48, v_num=0, val_loss_epoch=0.510, train_loss=0.397, val_loss_step=1.510] #015Epoch 2:  52%|█████▏    | 6570/12538 [12:06<10:59,  9.05it/s, loss=0.48, v_num=0, val_loss_epoch=0.510, train_loss=0.397, val_loss_step=1.510]#015Epoch 2:  52%|█████▏    | 6570/12538 [12:06<10:59,  9.05it/s, loss=0.464, v_num=0, val_loss_epoch=0.510, train_loss=0.610, val_loss_step=1.510]#015Epoch 2:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.05it/s, loss=0.464, v_num=0, val_loss_epoch=0.510, train_loss=0.610, val_loss_step=1.510]#015Epoch 2:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.05it/s, loss=0.445, v_num=0, val_loss_epoch=0.510, train_loss=0.404, val_loss_step=1.510]#015Epoch 2:  53%|█████▎    | 6630/12538 [12:12<10:53,  9.05it/s, loss=0.445, v_num=0, val_loss_epoch=0.510, train_loss=0.404, val_loss_step=1.510]#015Epoch 2:  53%|█████▎    | 6630/12538 [12:12<10:53,  9.05it/s, loss=0.461, v_num=0, val_loss_epoch=0.510, train_loss=0.477, val_loss_step=1.510]#015Epoch

[34mch 2:  65%|██████▍   | 8130/12538 [14:58<08:07,  9.05it/s, loss=0.487, v_num=0, val_loss_epoch=0.510, train_loss=0.468, val_loss_step=1.510]#015Epoch 2:  65%|██████▍   | 8130/12538 [14:58<08:07,  9.05it/s, loss=0.47, v_num=0, val_loss_epoch=0.510, train_loss=0.416, val_loss_step=1.510] #015Epoch 2:  65%|██████▌   | 8160/12538 [15:01<08:03,  9.05it/s, loss=0.47, v_num=0, val_loss_epoch=0.510, train_loss=0.416, val_loss_step=1.510]#015Epoch 2:  65%|██████▌   | 8160/12538 [15:01<08:03,  9.05it/s, loss=0.467, v_num=0, val_loss_epoch=0.510, train_loss=0.435, val_loss_step=1.510]#015Epoch 2:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.05it/s, loss=0.467, v_num=0, val_loss_epoch=0.510, train_loss=0.435, val_loss_step=1.510]#015Epoch 2:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.05it/s, loss=0.471, v_num=0, val_loss_epoch=0.510, train_loss=0.408, val_loss_step=1.510]#015Epoch 2:  66%|██████▌   | 8220/12538 [15:08<07:57,  9.05it/s, loss=0.471, v_num=0, val_loss_epoch=0.510, train_loss=0

[34m████▋  | 9660/12538 [17:47<05:18,  9.05it/s, loss=0.481, v_num=0, val_loss_epoch=0.510, train_loss=0.422, val_loss_step=1.510]#015Epoch 2:  77%|███████▋  | 9690/12538 [17:50<05:14,  9.05it/s, loss=0.481, v_num=0, val_loss_epoch=0.510, train_loss=0.422, val_loss_step=1.510]#015Epoch 2:  77%|███████▋  | 9690/12538 [17:50<05:14,  9.05it/s, loss=0.474, v_num=0, val_loss_epoch=0.510, train_loss=0.490, val_loss_step=1.510]#015Epoch 2:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.05it/s, loss=0.474, v_num=0, val_loss_epoch=0.510, train_loss=0.490, val_loss_step=1.510]#015Epoch 2:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.05it/s, loss=0.475, v_num=0, val_loss_epoch=0.510, train_loss=0.448, val_loss_step=1.510]#015Epoch 2:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.05it/s, loss=0.475, v_num=0, val_loss_epoch=0.510, train_loss=0.448, val_loss_step=1.510]#015Epoch 2:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.05it/s, loss=0.469, v_num=0, val_loss_epoch=0.510, train_loss=0.552, val_los

[34m11190/12538 [20:36<02:29,  9.05it/s, loss=0.453, v_num=0, val_loss_epoch=0.510, train_loss=0.435, val_loss_step=1.510]#015Epoch 2:  89%|████████▉ | 11190/12538 [20:36<02:29,  9.05it/s, loss=0.453, v_num=0, val_loss_epoch=0.510, train_loss=0.461, val_loss_step=1.510]#015Epoch 2:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.05it/s, loss=0.453, v_num=0, val_loss_epoch=0.510, train_loss=0.461, val_loss_step=1.510]#015Epoch 2:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.05it/s, loss=0.458, v_num=0, val_loss_epoch=0.510, train_loss=0.438, val_loss_step=1.510]#015Epoch 2:  90%|████████▉ | 11250/12538 [20:43<02:22,  9.05it/s, loss=0.458, v_num=0, val_loss_epoch=0.510, train_loss=0.438, val_loss_step=1.510]#015Epoch 2:  90%|████████▉ | 11250/12538 [20:43<02:22,  9.05it/s, loss=0.495, v_num=0, val_loss_epoch=0.510, train_loss=0.471, val_loss_step=1.510]#015Epoch 2:  90%|████████▉ | 11280/12538 [20:46<02:19,  9.05it/s, loss=0.495, v_num=0, val_loss_epoch=0.510, train_loss=0.471, val_loss_

[34m#015Validating:  41%|████      | 270/660 [00:25<00:36, 10.62it/s]#033[A#015Epoch 2:  97%|█████████▋| 12150/12538 [22:19<00:42,  9.07it/s, loss=0.482, v_num=0, val_loss_epoch=0.510, train_loss=0.541, val_loss_step=1.510][0m
[34m#015Validating:  45%|████▌     | 300/660 [00:28<00:34, 10.55it/s]#033[A#015Epoch 2:  97%|█████████▋| 12180/12538 [22:22<00:39,  9.07it/s, loss=0.482, v_num=0, val_loss_epoch=0.510, train_loss=0.541, val_loss_step=1.510][0m
[34m#015Validating:  50%|█████     | 330/660 [00:31<00:31, 10.44it/s]#033[A#015Epoch 2:  97%|█████████▋| 12210/12538 [22:25<00:36,  9.08it/s, loss=0.482, v_num=0, val_loss_epoch=0.510, train_loss=0.541, val_loss_step=1.510][0m
[34m#015Validating:  55%|█████▍    | 360/660 [00:34<00:28, 10.38it/s]#033[A#015Epoch 2:  98%|█████████▊| 12240/12538 [22:28<00:32,  9.08it/s, loss=0.482, v_num=0, val_loss_epoch=0.510, train_loss=0.541, val_loss_step=1.510][0m
[34m#015Validating:  59%|█████▉    | 390/660 [00:37<00:25, 10.41it/s]#033[A#015Epoc

[34m3:06<20:03,  9.02it/s, loss=0.48, v_num=0, val_loss_epoch=0.502, train_loss=0.405, val_loss_step=1.540]#015Epoch 3:  13%|█▎        | 1680/12538 [03:06<20:03,  9.02it/s, loss=0.463, v_num=0, val_loss_epoch=0.502, train_loss=0.418, val_loss_step=1.540]#015Epoch 3:  14%|█▎        | 1710/12538 [03:09<19:59,  9.03it/s, loss=0.463, v_num=0, val_loss_epoch=0.502, train_loss=0.418, val_loss_step=1.540]#015Epoch 3:  14%|█▎        | 1710/12538 [03:09<19:59,  9.03it/s, loss=0.459, v_num=0, val_loss_epoch=0.502, train_loss=0.448, val_loss_step=1.540]#015Epoch 3:  14%|█▍        | 1740/12538 [03:12<19:56,  9.02it/s, loss=0.459, v_num=0, val_loss_epoch=0.502, train_loss=0.448, val_loss_step=1.540]#015Epoch 3:  14%|█▍        | 1740/12538 [03:12<19:56,  9.02it/s, loss=0.471, v_num=0, val_loss_epoch=0.502, train_loss=0.469, val_loss_step=1.540]#015Epoch 3:  14%|█▍        | 1770/12538 [03:16<19:53,  9.02it/s, loss=0.471, v_num=0, val_loss_epoch=0.502, train_loss=0.469, val_loss_step=1.540]#015Epoch 

[34mit/s, loss=0.429, v_num=0, val_loss_epoch=0.502, train_loss=0.419, val_loss_step=1.540]#015Epoch 3:  27%|██▋       | 3330/12538 [06:08<16:59,  9.03it/s, loss=0.449, v_num=0, val_loss_epoch=0.502, train_loss=0.409, val_loss_step=1.540]#015Epoch 3:  27%|██▋       | 3360/12538 [06:11<16:56,  9.03it/s, loss=0.449, v_num=0, val_loss_epoch=0.502, train_loss=0.409, val_loss_step=1.540]#015Epoch 3:  27%|██▋       | 3360/12538 [06:11<16:56,  9.03it/s, loss=0.467, v_num=0, val_loss_epoch=0.502, train_loss=0.460, val_loss_step=1.540]#015Epoch 3:  27%|██▋       | 3390/12538 [06:15<16:52,  9.03it/s, loss=0.467, v_num=0, val_loss_epoch=0.502, train_loss=0.460, val_loss_step=1.540]#015Epoch 3:  27%|██▋       | 3390/12538 [06:15<16:52,  9.03it/s, loss=0.443, v_num=0, val_loss_epoch=0.502, train_loss=0.463, val_loss_step=1.540]#015Epoch 3:  27%|██▋       | 3420/12538 [06:18<16:49,  9.03it/s, loss=0.443, v_num=0, val_loss_epoch=0.502, train_loss=0.463, val_loss_step=1.540]#015Epoch 3:  27%|██▋     

[34moss=0.484, val_loss_step=1.540]#015Epoch 3:  39%|███▉      | 4950/12538 [09:07<13:59,  9.04it/s, loss=0.45, v_num=0, val_loss_epoch=0.502, train_loss=0.466, val_loss_step=1.540]#015Epoch 3:  40%|███▉      | 4980/12538 [09:11<13:56,  9.04it/s, loss=0.45, v_num=0, val_loss_epoch=0.502, train_loss=0.466, val_loss_step=1.540]#015Epoch 3:  40%|███▉      | 4980/12538 [09:11<13:56,  9.04it/s, loss=0.441, v_num=0, val_loss_epoch=0.502, train_loss=0.429, val_loss_step=1.540]#015Epoch 3:  40%|███▉      | 5010/12538 [09:14<13:53,  9.04it/s, loss=0.441, v_num=0, val_loss_epoch=0.502, train_loss=0.429, val_loss_step=1.540]#015Epoch 3:  40%|███▉      | 5010/12538 [09:14<13:53,  9.04it/s, loss=0.483, v_num=0, val_loss_epoch=0.502, train_loss=0.444, val_loss_step=1.540]#015Epoch 3:  40%|████      | 5040/12538 [09:17<13:49,  9.04it/s, loss=0.483, v_num=0, val_loss_epoch=0.502, train_loss=0.444, val_loss_step=1.540]#015Epoch 3:  40%|████      | 5040/12538 [09:17<13:49,  9.04it/s, loss=0.457, v_num=

[34m2:03<11:03,  9.04it/s, loss=0.455, v_num=0, val_loss_epoch=0.502, train_loss=0.458, val_loss_step=1.540]#015Epoch 3:  52%|█████▏    | 6570/12538 [12:06<11:00,  9.04it/s, loss=0.455, v_num=0, val_loss_epoch=0.502, train_loss=0.458, val_loss_step=1.540]#015Epoch 3:  52%|█████▏    | 6570/12538 [12:06<11:00,  9.04it/s, loss=0.476, v_num=0, val_loss_epoch=0.502, train_loss=0.372, val_loss_step=1.540]#015Epoch 3:  53%|█████▎    | 6600/12538 [12:10<10:56,  9.04it/s, loss=0.476, v_num=0, val_loss_epoch=0.502, train_loss=0.372, val_loss_step=1.540]#015Epoch 3:  53%|█████▎    | 6600/12538 [12:10<10:56,  9.04it/s, loss=0.433, v_num=0, val_loss_epoch=0.502, train_loss=0.361, val_loss_step=1.540]#015Epoch 3:  53%|█████▎    | 6630/12538 [12:13<10:53,  9.04it/s, loss=0.433, v_num=0, val_loss_epoch=0.502, train_loss=0.361, val_loss_step=1.540]#015Epoch 3:  53%|█████▎    | 6630/12538 [12:13<10:53,  9.04it/s, loss=0.463, v_num=0, val_loss_epoch=0.502, train_loss=0.438, val_loss_step=1.540]#015Epoch

[34m#015Epoch 3:  65%|██████▍   | 8130/12538 [15:00<08:08,  9.03it/s, loss=0.448, v_num=0, val_loss_epoch=0.502, train_loss=0.514, val_loss_step=1.540]#015Epoch 3:  65%|██████▍   | 8130/12538 [15:00<08:08,  9.03it/s, loss=0.435, v_num=0, val_loss_epoch=0.502, train_loss=0.456, val_loss_step=1.540]#015Epoch 3:  65%|██████▌   | 8160/12538 [15:03<08:04,  9.03it/s, loss=0.435, v_num=0, val_loss_epoch=0.502, train_loss=0.456, val_loss_step=1.540]#015Epoch 3:  65%|██████▌   | 8160/12538 [15:03<08:04,  9.03it/s, loss=0.451, v_num=0, val_loss_epoch=0.502, train_loss=0.463, val_loss_step=1.540]#015Epoch 3:  65%|██████▌   | 8190/12538 [15:06<08:01,  9.03it/s, loss=0.451, v_num=0, val_loss_epoch=0.502, train_loss=0.463, val_loss_step=1.540]#015Epoch 3:  65%|██████▌   | 8190/12538 [15:06<08:01,  9.03it/s, loss=0.477, v_num=0, val_loss_epoch=0.502, train_loss=0.410, val_loss_step=1.540]#015Epoch 3:  66%|██████▌   | 8220/12538 [15:10<07:58,  9.03it/s, loss=0.477, v_num=0, val_loss_epoch=0.502, trai

[34m�██████▋  | 9660/12538 [17:51<05:19,  9.02it/s, loss=0.45, v_num=0, val_loss_epoch=0.502, train_loss=0.374, val_loss_step=1.540] #015Epoch 3:  77%|███████▋  | 9690/12538 [17:54<05:15,  9.02it/s, loss=0.45, v_num=0, val_loss_epoch=0.502, train_loss=0.374, val_loss_step=1.540]#015Epoch 3:  77%|███████▋  | 9690/12538 [17:54<05:15,  9.02it/s, loss=0.468, v_num=0, val_loss_epoch=0.502, train_loss=0.425, val_loss_step=1.540]#015Epoch 3:  78%|███████▊  | 9720/12538 [17:58<05:12,  9.02it/s, loss=0.468, v_num=0, val_loss_epoch=0.502, train_loss=0.425, val_loss_step=1.540]#015Epoch 3:  78%|███████▊  | 9720/12538 [17:58<05:12,  9.02it/s, loss=0.457, v_num=0, val_loss_epoch=0.502, train_loss=0.365, val_loss_step=1.540]#015Epoch 3:  78%|███████▊  | 9750/12538 [18:01<05:09,  9.02it/s, loss=0.457, v_num=0, val_loss_epoch=0.502, train_loss=0.365, val_loss_step=1.540]#015Epoch 3:  78%|███████▊  | 9750/12538 [18:01<05:09,  9.02it/s, loss=0.461, v_num=0, val_loss_epoch=0.502, train_loss=0.616, val_l

[34m��▉ | 11190/12538 [20:41<02:29,  9.01it/s, loss=0.461, v_num=0, val_loss_epoch=0.502, train_loss=0.411, val_loss_step=1.540]#015Epoch 3:  89%|████████▉ | 11190/12538 [20:41<02:29,  9.01it/s, loss=0.463, v_num=0, val_loss_epoch=0.502, train_loss=0.509, val_loss_step=1.540]#015Epoch 3:  89%|████████▉ | 11220/12538 [20:44<02:26,  9.01it/s, loss=0.463, v_num=0, val_loss_epoch=0.502, train_loss=0.509, val_loss_step=1.540]#015Epoch 3:  89%|████████▉ | 11220/12538 [20:44<02:26,  9.01it/s, loss=0.448, v_num=0, val_loss_epoch=0.502, train_loss=0.415, val_loss_step=1.540]#015Epoch 3:  90%|████████▉ | 11250/12538 [20:48<02:22,  9.01it/s, loss=0.448, v_num=0, val_loss_epoch=0.502, train_loss=0.415, val_loss_step=1.540]#015Epoch 3:  90%|████████▉ | 11250/12538 [20:48<02:22,  9.01it/s, loss=0.451, v_num=0, val_loss_epoch=0.502, train_loss=0.471, val_loss_step=1.540]#015Epoch 3:  90%|████████▉ | 11280/12538 [20:51<02:19,  9.01it/s, loss=0.451, v_num=0, val_loss_epoch=0.502, train_loss=0.471, val

[34m#015Validating:  32%|███▏      | 210/660 [00:19<00:41, 10.91it/s]#033[A#015Epoch 3:  96%|█████████▋| 12090/12538 [22:18<00:49,  9.03it/s, loss=0.467, v_num=0, val_loss_epoch=0.502, train_loss=0.382, val_loss_step=1.540][0m
[34m#015Validating:  36%|███▋      | 240/660 [00:22<00:38, 10.88it/s]#033[A#015Epoch 3:  97%|█████████▋| 12120/12538 [22:20<00:46,  9.04it/s, loss=0.467, v_num=0, val_loss_epoch=0.502, train_loss=0.382, val_loss_step=1.540][0m
[34m#015Validating:  41%|████      | 270/660 [00:25<00:36, 10.80it/s]#033[A#015Epoch 3:  97%|█████████▋| 12150/12538 [22:23<00:42,  9.04it/s, loss=0.467, v_num=0, val_loss_epoch=0.502, train_loss=0.382, val_loss_step=1.540][0m
[34m#015Validating:  45%|████▌     | 300/660 [00:28<00:33, 10.67it/s]#033[A#015Epoch 3:  97%|█████████▋| 12180/12538 [22:26<00:39,  9.04it/s, loss=0.467, v_num=0, val_loss_epoch=0.502, train_loss=0.382, val_loss_step=1.540][0m
[34m#015Validating:  50%|█████     | 330/660 [00:30<00:31, 10.55it/s]#033[A#015Epoc

[34m, loss=0.462, v_num=1, val_loss_epoch=0.700, train_loss=0.429]#015Epoch 0:  16%|█▌        | 1950/12538 [03:36<19:33,  9.02it/s, loss=0.462, v_num=1, val_loss_epoch=0.700, train_loss=0.429]#015Epoch 0:  16%|█▌        | 1950/12538 [03:36<19:33,  9.02it/s, loss=0.435, v_num=1, val_loss_epoch=0.700, train_loss=0.443]#015Epoch 0:  16%|█▌        | 1980/12538 [03:39<19:29,  9.03it/s, loss=0.435, v_num=1, val_loss_epoch=0.700, train_loss=0.443]#015Epoch 0:  16%|█▌        | 1980/12538 [03:39<19:29,  9.03it/s, loss=0.439, v_num=1, val_loss_epoch=0.700, train_loss=0.464]#015Epoch 0:  16%|█▌        | 2010/12538 [03:42<19:26,  9.02it/s, loss=0.439, v_num=1, val_loss_epoch=0.700, train_loss=0.464]#015Epoch 0:  16%|█▌        | 2010/12538 [03:42<19:26,  9.02it/s, loss=0.436, v_num=1, val_loss_epoch=0.700, train_loss=0.473]#015Epoch 0:  16%|█▋        | 2040/12538 [03:45<19:23,  9.03it/s, loss=0.436, v_num=1, val_loss_epoch=0.700, train_loss=0.473]#015Epoch 0:  16%|█▋        | 2040/12538 [03:45<19:

[34m[07:06<16:05,  9.01it/s, loss=0.455, v_num=1, val_loss_epoch=0.700, train_loss=0.399]#015Epoch 0:  31%|███       | 3870/12538 [07:09<16:02,  9.01it/s, loss=0.455, v_num=1, val_loss_epoch=0.700, train_loss=0.399]#015Epoch 0:  31%|███       | 3870/12538 [07:09<16:02,  9.01it/s, loss=0.443, v_num=1, val_loss_epoch=0.700, train_loss=0.364]#015Epoch 0:  31%|███       | 3900/12538 [07:12<15:58,  9.01it/s, loss=0.443, v_num=1, val_loss_epoch=0.700, train_loss=0.364]#015Epoch 0:  31%|███       | 3900/12538 [07:12<15:58,  9.01it/s, loss=0.444, v_num=1, val_loss_epoch=0.700, train_loss=0.380]#015Epoch 0:  31%|███▏      | 3930/12538 [07:16<15:55,  9.01it/s, loss=0.444, v_num=1, val_loss_epoch=0.700, train_loss=0.380]#015Epoch 0:  31%|███▏      | 3930/12538 [07:16<15:55,  9.01it/s, loss=0.433, v_num=1, val_loss_epoch=0.700, train_loss=0.527]#015Epoch 0:  32%|███▏      | 3960/12538 [07:19<15:52,  9.01it/s, loss=0.433, v_num=1, val_loss_epoch=0.700, train_loss=0.527]#015Epoch 0:  32%|███▏      

[34m#015Epoch 0:  46%|████▌     | 5730/12538 [10:35<12:34,  9.02it/s, loss=0.42, v_num=1, val_loss_epoch=0.700, train_loss=0.386]#015Epoch 0:  46%|████▌     | 5730/12538 [10:35<12:34,  9.02it/s, loss=0.422, v_num=1, val_loss_epoch=0.700, train_loss=0.377]#015Epoch 0:  46%|████▌     | 5760/12538 [10:38<12:31,  9.02it/s, loss=0.422, v_num=1, val_loss_epoch=0.700, train_loss=0.377]#015Epoch 0:  46%|████▌     | 5760/12538 [10:38<12:31,  9.02it/s, loss=0.448, v_num=1, val_loss_epoch=0.700, train_loss=0.383]#015Epoch 0:  46%|████▌     | 5790/12538 [10:41<12:28,  9.02it/s, loss=0.448, v_num=1, val_loss_epoch=0.700, train_loss=0.383]#015Epoch 0:  46%|████▌     | 5790/12538 [10:41<12:28,  9.02it/s, loss=0.426, v_num=1, val_loss_epoch=0.700, train_loss=0.516]#015Epoch 0:  46%|████▋     | 5820/12538 [10:45<12:24,  9.02it/s, loss=0.426, v_num=1, val_loss_epoch=0.700, train_loss=0.516]#015Epoch 0:  46%|████▋     | 5820/12538 [10:45<12:24,  9.02it/s, loss=0.421, v_num=1, val_loss_epoch=0.700, train

[34m████    | 7560/12538 [13:57<09:11,  9.02it/s, loss=0.402, v_num=1, val_loss_epoch=0.700, train_loss=0.386]#015Epoch 0:  60%|██████    | 7560/12538 [13:57<09:11,  9.02it/s, loss=0.409, v_num=1, val_loss_epoch=0.700, train_loss=0.396]#015Epoch 0:  61%|██████    | 7590/12538 [14:00<09:08,  9.03it/s, loss=0.409, v_num=1, val_loss_epoch=0.700, train_loss=0.396]#015Epoch 0:  61%|██████    | 7590/12538 [14:00<09:08,  9.03it/s, loss=0.415, v_num=1, val_loss_epoch=0.700, train_loss=0.365]#015Epoch 0:  61%|██████    | 7620/12538 [14:04<09:04,  9.03it/s, loss=0.415, v_num=1, val_loss_epoch=0.700, train_loss=0.365]#015Epoch 0:  61%|██████    | 7620/12538 [14:04<09:04,  9.03it/s, loss=0.378, v_num=1, val_loss_epoch=0.700, train_loss=0.331]#015Epoch 0:  61%|██████    | 7650/12538 [14:07<09:01,  9.02it/s, loss=0.378, v_num=1, val_loss_epoch=0.700, train_loss=0.331]#015Epoch 0:  61%|██████    | 7650/12538 [14:07<09:01,  9.02it/s, loss=0.419, v_num=1, val_loss_epoch=0.700, train_loss=0.382]#015Epo

[34m3it/s, loss=0.419, v_num=1, val_loss_epoch=0.700, train_loss=0.406]#015Epoch 0:  75%|███████▍  | 9360/12538 [17:16<05:52,  9.03it/s, loss=0.419, v_num=1, val_loss_epoch=0.700, train_loss=0.406]#015Epoch 0:  75%|███████▍  | 9360/12538 [17:16<05:52,  9.03it/s, loss=0.393, v_num=1, val_loss_epoch=0.700, train_loss=0.398]#015Epoch 0:  75%|███████▍  | 9390/12538 [17:20<05:48,  9.03it/s, loss=0.393, v_num=1, val_loss_epoch=0.700, train_loss=0.398]#015Epoch 0:  75%|███████▍  | 9390/12538 [17:20<05:48,  9.03it/s, loss=0.416, v_num=1, val_loss_epoch=0.700, train_loss=0.506]#015Epoch 0:  75%|███████▌  | 9420/12538 [17:23<05:45,  9.03it/s, loss=0.416, v_num=1, val_loss_epoch=0.700, train_loss=0.506]#015Epoch 0:  75%|███████▌  | 9420/12538 [17:23<05:45,  9.03it/s, loss=0.384, v_num=1, val_loss_epoch=0.700, train_loss=0.349]#015Epoch 0:  75%|███████▌  | 9450/12538 [17:26<05:42,  9.03it/s, loss=0.384, v_num=1, val_loss_epoch=0.700, train_loss=0.349]#015Epoch 0:  75%|███████▌  | 9450/12538 [17:2

[34mpoch 0:  89%|████████▊ | 11100/12538 [20:29<02:39,  9.03it/s, loss=0.374, v_num=1, val_loss_epoch=0.700, train_loss=0.310]#015Epoch 0:  89%|████████▊ | 11100/12538 [20:29<02:39,  9.03it/s, loss=0.374, v_num=1, val_loss_epoch=0.700, train_loss=0.308]#015Epoch 0:  89%|████████▉ | 11130/12538 [20:33<02:35,  9.03it/s, loss=0.374, v_num=1, val_loss_epoch=0.700, train_loss=0.308]#015Epoch 0:  89%|████████▉ | 11130/12538 [20:33<02:35,  9.03it/s, loss=0.395, v_num=1, val_loss_epoch=0.700, train_loss=0.411]#015Epoch 0:  89%|████████▉ | 11160/12538 [20:36<02:32,  9.03it/s, loss=0.395, v_num=1, val_loss_epoch=0.700, train_loss=0.411]#015Epoch 0:  89%|████████▉ | 11160/12538 [20:36<02:32,  9.03it/s, loss=0.384, v_num=1, val_loss_epoch=0.700, train_loss=0.324]#015Epoch 0:  89%|████████▉ | 11190/12538 [20:39<02:29,  9.03it/s, loss=0.384, v_num=1, val_loss_epoch=0.700, train_loss=0.324]#015Epoch 0:  89%|████████▉ | 11190/12538 [20:39<02:29,  9.03it/s, loss=0.388, v_num=1, val_loss_epoch=0.700, t

[34m#015Validating:  50%|█████     | 330/660 [00:30<00:31, 10.56it/s]#033[A#015Epoch 0:  97%|█████████▋| 12210/12538 [22:27<00:36,  9.06it/s, loss=0.371, v_num=1, val_loss_epoch=0.700, train_loss=0.291][0m
[34m#015Validating:  55%|█████▍    | 360/660 [00:33<00:28, 10.42it/s]#033[A#015Epoch 0:  98%|█████████▊| 12240/12538 [22:30<00:32,  9.07it/s, loss=0.371, v_num=1, val_loss_epoch=0.700, train_loss=0.291][0m
[34m#015Validating:  59%|█████▉    | 390/660 [00:36<00:25, 10.44it/s]#033[A#015Epoch 0:  98%|█████████▊| 12270/12538 [22:33<00:29,  9.07it/s, loss=0.371, v_num=1, val_loss_epoch=0.700, train_loss=0.291][0m
[34m#015Validating:  64%|██████▎   | 420/660 [00:39<00:23, 10.36it/s]#033[A#015Epoch 0:  98%|█████████▊| 12300/12538 [22:35<00:26,  9.07it/s, loss=0.371, v_num=1, val_loss_epoch=0.700, train_loss=0.291][0m
[34m#015Validating:  68%|██████▊   | 450/660 [00:42<00:20, 10.33it/s]#033[A#015Epoch 0:  98%|█████████▊| 12330/12538 [22:38<00:22,  9.07it/s, loss=0.371, v_num=1, val_

[34m0:03,  9.02it/s, loss=0.352, v_num=1, val_loss_epoch=0.428, train_loss=0.310, val_loss_step=1.690]#015Epoch 1:  13%|█▎        | 1680/12538 [03:06<20:03,  9.02it/s, loss=0.394, v_num=1, val_loss_epoch=0.428, train_loss=0.331, val_loss_step=1.690]#015Epoch 1:  14%|█▎        | 1710/12538 [03:09<19:59,  9.03it/s, loss=0.394, v_num=1, val_loss_epoch=0.428, train_loss=0.331, val_loss_step=1.690]#015Epoch 1:  14%|█▎        | 1710/12538 [03:09<19:59,  9.03it/s, loss=0.357, v_num=1, val_loss_epoch=0.428, train_loss=0.399, val_loss_step=1.690]#015Epoch 1:  14%|█▍        | 1740/12538 [03:12<19:56,  9.03it/s, loss=0.357, v_num=1, val_loss_epoch=0.428, train_loss=0.399, val_loss_step=1.690]#015Epoch 1:  14%|█▍        | 1740/12538 [03:12<19:56,  9.03it/s, loss=0.364, v_num=1, val_loss_epoch=0.428, train_loss=0.401, val_loss_step=1.690]#015Epoch 1:  14%|█▍        | 1770/12538 [03:16<19:52,  9.03it/s, loss=0.364, v_num=1, val_loss_epoch=0.428, train_loss=0.401, val_loss_step=1.690]#015Epoch 1:  1

[34m, v_num=1, val_loss_epoch=0.428, train_loss=0.404, val_loss_step=1.690]#015Epoch 1:  27%|██▋       | 3330/12538 [06:08<16:58,  9.04it/s, loss=0.36, v_num=1, val_loss_epoch=0.428, train_loss=0.444, val_loss_step=1.690] #015Epoch 1:  27%|██▋       | 3360/12538 [06:11<16:55,  9.04it/s, loss=0.36, v_num=1, val_loss_epoch=0.428, train_loss=0.444, val_loss_step=1.690]#015Epoch 1:  27%|██▋       | 3360/12538 [06:11<16:55,  9.04it/s, loss=0.377, v_num=1, val_loss_epoch=0.428, train_loss=0.432, val_loss_step=1.690]#015Epoch 1:  27%|██▋       | 3390/12538 [06:15<16:52,  9.04it/s, loss=0.377, v_num=1, val_loss_epoch=0.428, train_loss=0.432, val_loss_step=1.690]#015Epoch 1:  27%|██▋       | 3390/12538 [06:15<16:52,  9.04it/s, loss=0.374, v_num=1, val_loss_epoch=0.428, train_loss=0.546, val_loss_step=1.690]#015Epoch 1:  27%|██▋       | 3420/12538 [06:18<16:48,  9.04it/s, loss=0.374, v_num=1, val_loss_epoch=0.428, train_loss=0.546, val_loss_step=1.690]#015Epoch 1:  27%|██▋       | 3420/12538 [0

[34ml_loss_step=1.690]#015Epoch 1:  39%|███▉      | 4950/12538 [09:07<13:59,  9.04it/s, loss=0.387, v_num=1, val_loss_epoch=0.428, train_loss=0.488, val_loss_step=1.690]#015Epoch 1:  40%|███▉      | 4980/12538 [09:10<13:55,  9.04it/s, loss=0.387, v_num=1, val_loss_epoch=0.428, train_loss=0.488, val_loss_step=1.690]#015Epoch 1:  40%|███▉      | 4980/12538 [09:10<13:55,  9.04it/s, loss=0.364, v_num=1, val_loss_epoch=0.428, train_loss=0.463, val_loss_step=1.690]#015Epoch 1:  40%|███▉      | 5010/12538 [09:14<13:52,  9.04it/s, loss=0.364, v_num=1, val_loss_epoch=0.428, train_loss=0.463, val_loss_step=1.690]#015Epoch 1:  40%|███▉      | 5010/12538 [09:14<13:52,  9.04it/s, loss=0.347, v_num=1, val_loss_epoch=0.428, train_loss=0.377, val_loss_step=1.690]#015Epoch 1:  40%|████      | 5040/12538 [09:17<13:49,  9.04it/s, loss=0.347, v_num=1, val_loss_epoch=0.428, train_loss=0.377, val_loss_step=1.690]#015Epoch 1:  40%|████      | 5040/12538 [09:17<13:49,  9.04it/s, loss=0.362, v_num=1, val_loss

[34m3,  9.04it/s, loss=0.338, v_num=1, val_loss_epoch=0.428, train_loss=0.453, val_loss_step=1.690]#015Epoch 1:  52%|█████▏    | 6570/12538 [12:06<10:59,  9.04it/s, loss=0.338, v_num=1, val_loss_epoch=0.428, train_loss=0.453, val_loss_step=1.690]#015Epoch 1:  52%|█████▏    | 6570/12538 [12:06<10:59,  9.04it/s, loss=0.367, v_num=1, val_loss_epoch=0.428, train_loss=0.359, val_loss_step=1.690]#015Epoch 1:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.04it/s, loss=0.367, v_num=1, val_loss_epoch=0.428, train_loss=0.359, val_loss_step=1.690]#015Epoch 1:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.04it/s, loss=0.371, v_num=1, val_loss_epoch=0.428, train_loss=0.318, val_loss_step=1.690]#015Epoch 1:  53%|█████▎    | 6630/12538 [12:13<10:53,  9.04it/s, loss=0.371, v_num=1, val_loss_epoch=0.428, train_loss=0.318, val_loss_step=1.690]#015Epoch 1:  53%|█████▎    | 6630/12538 [12:13<10:53,  9.04it/s, loss=0.356, v_num=1, val_loss_epoch=0.428, train_loss=0.379, val_loss_step=1.690]#015Epoch 1:  53%|

[34m:  65%|██████▍   | 8130/12538 [14:58<08:07,  9.04it/s, loss=0.38, v_num=1, val_loss_epoch=0.428, train_loss=0.398, val_loss_step=1.690]#015Epoch 1:  65%|██████▍   | 8130/12538 [14:58<08:07,  9.04it/s, loss=0.362, v_num=1, val_loss_epoch=0.428, train_loss=0.345, val_loss_step=1.690]#015Epoch 1:  65%|██████▌   | 8160/12538 [15:02<08:04,  9.04it/s, loss=0.362, v_num=1, val_loss_epoch=0.428, train_loss=0.345, val_loss_step=1.690]#015Epoch 1:  65%|██████▌   | 8160/12538 [15:02<08:04,  9.04it/s, loss=0.333, v_num=1, val_loss_epoch=0.428, train_loss=0.330, val_loss_step=1.690]#015Epoch 1:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.04it/s, loss=0.333, v_num=1, val_loss_epoch=0.428, train_loss=0.330, val_loss_step=1.690]#015Epoch 1:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.04it/s, loss=0.355, v_num=1, val_loss_epoch=0.428, train_loss=0.335, val_loss_step=1.690]#015Epoch 1:  66%|██████▌   | 8220/12538 [15:08<07:57,  9.05it/s, loss=0.355, v_num=1, val_loss_epoch=0.428, train_loss=0.335

[34m�███▋  | 9660/12538 [17:47<05:18,  9.05it/s, loss=0.356, v_num=1, val_loss_epoch=0.428, train_loss=0.301, val_loss_step=1.690]#015Epoch 1:  77%|███████▋  | 9690/12538 [17:51<05:14,  9.05it/s, loss=0.356, v_num=1, val_loss_epoch=0.428, train_loss=0.301, val_loss_step=1.690]#015Epoch 1:  77%|███████▋  | 9690/12538 [17:51<05:14,  9.05it/s, loss=0.35, v_num=1, val_loss_epoch=0.428, train_loss=0.296, val_loss_step=1.690] #015Epoch 1:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.05it/s, loss=0.35, v_num=1, val_loss_epoch=0.428, train_loss=0.296, val_loss_step=1.690]#015Epoch 1:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.05it/s, loss=0.342, v_num=1, val_loss_epoch=0.428, train_loss=0.316, val_loss_step=1.690]#015Epoch 1:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.05it/s, loss=0.342, v_num=1, val_loss_epoch=0.428, train_loss=0.316, val_loss_step=1.690]#015Epoch 1:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.05it/s, loss=0.357, v_num=1, val_loss_epoch=0.428, train_loss=0.368, val_loss

[34m/12538 [20:37<02:29,  9.04it/s, loss=0.335, v_num=1, val_loss_epoch=0.428, train_loss=0.245, val_loss_step=1.690]#015Epoch 1:  89%|████████▉ | 11190/12538 [20:37<02:29,  9.04it/s, loss=0.366, v_num=1, val_loss_epoch=0.428, train_loss=0.397, val_loss_step=1.690]#015Epoch 1:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.04it/s, loss=0.366, v_num=1, val_loss_epoch=0.428, train_loss=0.397, val_loss_step=1.690]#015Epoch 1:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.04it/s, loss=0.375, v_num=1, val_loss_epoch=0.428, train_loss=0.377, val_loss_step=1.690]#015Epoch 1:  90%|████████▉ | 11250/12538 [20:44<02:22,  9.04it/s, loss=0.375, v_num=1, val_loss_epoch=0.428, train_loss=0.377, val_loss_step=1.690]#015Epoch 1:  90%|████████▉ | 11250/12538 [20:44<02:22,  9.04it/s, loss=0.355, v_num=1, val_loss_epoch=0.428, train_loss=0.401, val_loss_step=1.690]#015Epoch 1:  90%|████████▉ | 11280/12538 [20:47<02:19,  9.04it/s, loss=0.355, v_num=1, val_loss_epoch=0.428, train_loss=0.401, val_loss_step=

[34m#015Validating:  32%|███▏      | 210/660 [00:19<00:41, 10.88it/s]#033[A#015Epoch 1:  96%|█████████▋| 12090/12538 [22:13<00:49,  9.06it/s, loss=0.366, v_num=1, val_loss_epoch=0.428, train_loss=0.367, val_loss_step=1.690][0m
[34m#015Validating:  36%|███▋      | 240/660 [00:22<00:38, 10.87it/s]#033[A#015Epoch 1:  97%|█████████▋| 12120/12538 [22:16<00:46,  9.07it/s, loss=0.366, v_num=1, val_loss_epoch=0.428, train_loss=0.367, val_loss_step=1.690][0m
[34m#015Validating:  41%|████      | 270/660 [00:25<00:36, 10.75it/s]#033[A#015Epoch 1:  97%|█████████▋| 12150/12538 [22:19<00:42,  9.07it/s, loss=0.366, v_num=1, val_loss_epoch=0.428, train_loss=0.367, val_loss_step=1.690][0m
[34m#015Validating:  45%|████▌     | 300/660 [00:28<00:33, 10.66it/s]#033[A#015Epoch 1:  97%|█████████▋| 12180/12538 [22:22<00:39,  9.07it/s, loss=0.366, v_num=1, val_loss_epoch=0.428, train_loss=0.367, val_loss_step=1.690][0m
[34m#015Validating:  50%|█████     | 330/660 [00:30<00:31, 10.51it/s]#033[A#015Epoc

[34m20:03,  9.02it/s, loss=0.351, v_num=1, val_loss_epoch=0.416, train_loss=0.357, val_loss_step=1.680]#015Epoch 2:  13%|█▎        | 1680/12538 [03:06<20:03,  9.02it/s, loss=0.322, v_num=1, val_loss_epoch=0.416, train_loss=0.419, val_loss_step=1.680]#015Epoch 2:  14%|█▎        | 1710/12538 [03:09<20:00,  9.02it/s, loss=0.322, v_num=1, val_loss_epoch=0.416, train_loss=0.419, val_loss_step=1.680]#015Epoch 2:  14%|█▎        | 1710/12538 [03:09<20:00,  9.02it/s, loss=0.307, v_num=1, val_loss_epoch=0.416, train_loss=0.276, val_loss_step=1.680]#015Epoch 2:  14%|█▍        | 1740/12538 [03:12<19:57,  9.02it/s, loss=0.307, v_num=1, val_loss_epoch=0.416, train_loss=0.276, val_loss_step=1.680]#015Epoch 2:  14%|█▍        | 1740/12538 [03:12<19:57,  9.02it/s, loss=0.36, v_num=1, val_loss_epoch=0.416, train_loss=0.408, val_loss_step=1.680] #015Epoch 2:  14%|█▍        | 1770/12538 [03:16<19:54,  9.02it/s, loss=0.36, v_num=1, val_loss_epoch=0.416, train_loss=0.408, val_loss_step=1.680]#015Epoch 2:  1

[34m7, v_num=1, val_loss_epoch=0.416, train_loss=0.264, val_loss_step=1.680]#015Epoch 2:  27%|██▋       | 3330/12538 [06:09<17:01,  9.01it/s, loss=0.336, v_num=1, val_loss_epoch=0.416, train_loss=0.292, val_loss_step=1.680]#015Epoch 2:  27%|██▋       | 3360/12538 [06:12<16:58,  9.01it/s, loss=0.336, v_num=1, val_loss_epoch=0.416, train_loss=0.292, val_loss_step=1.680]#015Epoch 2:  27%|██▋       | 3360/12538 [06:12<16:58,  9.01it/s, loss=0.313, v_num=1, val_loss_epoch=0.416, train_loss=0.285, val_loss_step=1.680]#015Epoch 2:  27%|██▋       | 3390/12538 [06:16<16:55,  9.01it/s, loss=0.313, v_num=1, val_loss_epoch=0.416, train_loss=0.285, val_loss_step=1.680]#015Epoch 2:  27%|██▋       | 3390/12538 [06:16<16:55,  9.01it/s, loss=0.323, v_num=1, val_loss_epoch=0.416, train_loss=0.329, val_loss_step=1.680]#015Epoch 2:  27%|██▋       | 3420/12538 [06:19<16:51,  9.01it/s, loss=0.323, v_num=1, val_loss_epoch=0.416, train_loss=0.329, val_loss_step=1.680]#015Epoch 2:  27%|██▋       | 3420/12538 

[34m val_loss_step=1.680]#015Epoch 2:  39%|███▉      | 4950/12538 [09:08<14:00,  9.02it/s, loss=0.317, v_num=1, val_loss_epoch=0.416, train_loss=0.298, val_loss_step=1.680]#015Epoch 2:  40%|███▉      | 4980/12538 [09:11<13:57,  9.02it/s, loss=0.317, v_num=1, val_loss_epoch=0.416, train_loss=0.298, val_loss_step=1.680]#015Epoch 2:  40%|███▉      | 4980/12538 [09:11<13:57,  9.02it/s, loss=0.344, v_num=1, val_loss_epoch=0.416, train_loss=0.477, val_loss_step=1.680]#015Epoch 2:  40%|███▉      | 5010/12538 [09:15<13:54,  9.02it/s, loss=0.344, v_num=1, val_loss_epoch=0.416, train_loss=0.477, val_loss_step=1.680]#015Epoch 2:  40%|███▉      | 5010/12538 [09:15<13:54,  9.02it/s, loss=0.32, v_num=1, val_loss_epoch=0.416, train_loss=0.279, val_loss_step=1.680] #015Epoch 2:  40%|████      | 5040/12538 [09:18<13:50,  9.02it/s, loss=0.32, v_num=1, val_loss_epoch=0.416, train_loss=0.279, val_loss_step=1.680]#015Epoch 2:  40%|████      | 5040/12538 [09:18<13:50,  9.02it/s, loss=0.337, v_num=1, val_lo

[34m4,  9.03it/s, loss=0.314, v_num=1, val_loss_epoch=0.416, train_loss=0.267, val_loss_step=1.680]#015Epoch 2:  52%|█████▏    | 6570/12538 [12:07<11:01,  9.03it/s, loss=0.314, v_num=1, val_loss_epoch=0.416, train_loss=0.267, val_loss_step=1.680]#015Epoch 2:  52%|█████▏    | 6570/12538 [12:07<11:01,  9.03it/s, loss=0.299, v_num=1, val_loss_epoch=0.416, train_loss=0.317, val_loss_step=1.680]#015Epoch 2:  53%|█████▎    | 6600/12538 [12:11<10:57,  9.03it/s, loss=0.299, v_num=1, val_loss_epoch=0.416, train_loss=0.317, val_loss_step=1.680]#015Epoch 2:  53%|█████▎    | 6600/12538 [12:11<10:57,  9.03it/s, loss=0.348, v_num=1, val_loss_epoch=0.416, train_loss=0.268, val_loss_step=1.680]#015Epoch 2:  53%|█████▎    | 6630/12538 [12:14<10:54,  9.03it/s, loss=0.348, v_num=1, val_loss_epoch=0.416, train_loss=0.268, val_loss_step=1.680]#015Epoch 2:  53%|█████▎    | 6630/12538 [12:14<10:54,  9.03it/s, loss=0.329, v_num=1, val_loss_epoch=0.416, train_loss=0.259, val_loss_step=1.680]#015Epoch 2:  53%|

[34m  65%|██████▍   | 8130/12538 [15:00<08:08,  9.03it/s, loss=0.3, v_num=1, val_loss_epoch=0.416, train_loss=0.303, val_loss_step=1.680]#015Epoch 2:  65%|██████▍   | 8130/12538 [15:00<08:08,  9.03it/s, loss=0.332, v_num=1, val_loss_epoch=0.416, train_loss=0.232, val_loss_step=1.680]#015Epoch 2:  65%|██████▌   | 8160/12538 [15:03<08:04,  9.03it/s, loss=0.332, v_num=1, val_loss_epoch=0.416, train_loss=0.232, val_loss_step=1.680]#015Epoch 2:  65%|██████▌   | 8160/12538 [15:03<08:04,  9.03it/s, loss=0.31, v_num=1, val_loss_epoch=0.416, train_loss=0.318, val_loss_step=1.680] #015Epoch 2:  65%|██████▌   | 8190/12538 [15:06<08:01,  9.03it/s, loss=0.31, v_num=1, val_loss_epoch=0.416, train_loss=0.318, val_loss_step=1.680]#015Epoch 2:  65%|██████▌   | 8190/12538 [15:06<08:01,  9.03it/s, loss=0.299, v_num=1, val_loss_epoch=0.416, train_loss=0.324, val_loss_step=1.680]#015Epoch 2:  66%|██████▌   | 8220/12538 [15:10<07:58,  9.03it/s, loss=0.299, v_num=1, val_loss_epoch=0.416, train_loss=0.324, v

[34m��█▋  | 9660/12538 [17:49<05:18,  9.03it/s, loss=0.327, v_num=1, val_loss_epoch=0.416, train_loss=0.311, val_loss_step=1.680]#015Epoch 2:  77%|███████▋  | 9690/12538 [17:52<05:15,  9.03it/s, loss=0.327, v_num=1, val_loss_epoch=0.416, train_loss=0.311, val_loss_step=1.680]#015Epoch 2:  77%|███████▋  | 9690/12538 [17:52<05:15,  9.03it/s, loss=0.351, v_num=1, val_loss_epoch=0.416, train_loss=0.252, val_loss_step=1.680]#015Epoch 2:  78%|███████▊  | 9720/12538 [17:56<05:12,  9.03it/s, loss=0.351, v_num=1, val_loss_epoch=0.416, train_loss=0.252, val_loss_step=1.680]#015Epoch 2:  78%|███████▊  | 9720/12538 [17:56<05:12,  9.03it/s, loss=0.322, v_num=1, val_loss_epoch=0.416, train_loss=0.279, val_loss_step=1.680]#015Epoch 2:  78%|███████▊  | 9750/12538 [17:59<05:08,  9.03it/s, loss=0.322, v_num=1, val_loss_epoch=0.416, train_loss=0.279, val_loss_step=1.680]#015Epoch 2:  78%|███████▊  | 9750/12538 [17:59<05:08,  9.03it/s, loss=0.329, v_num=1, val_loss_epoch=0.416, train_loss=0.451, val_loss

[34m8 [20:38<02:29,  9.04it/s, loss=0.301, v_num=1, val_loss_epoch=0.416, train_loss=0.346, val_loss_step=1.680]#015Epoch 2:  89%|████████▉ | 11190/12538 [20:38<02:29,  9.04it/s, loss=0.302, v_num=1, val_loss_epoch=0.416, train_loss=0.344, val_loss_step=1.680]#015Epoch 2:  89%|████████▉ | 11220/12538 [20:41<02:25,  9.03it/s, loss=0.302, v_num=1, val_loss_epoch=0.416, train_loss=0.344, val_loss_step=1.680]#015Epoch 2:  89%|████████▉ | 11220/12538 [20:41<02:25,  9.03it/s, loss=0.321, v_num=1, val_loss_epoch=0.416, train_loss=0.245, val_loss_step=1.680]#015Epoch 2:  90%|████████▉ | 11250/12538 [20:45<02:22,  9.03it/s, loss=0.321, v_num=1, val_loss_epoch=0.416, train_loss=0.245, val_loss_step=1.680]#015Epoch 2:  90%|████████▉ | 11250/12538 [20:45<02:22,  9.03it/s, loss=0.294, v_num=1, val_loss_epoch=0.416, train_loss=0.500, val_loss_step=1.680]#015Epoch 2:  90%|████████▉ | 11280/12538 [20:48<02:19,  9.04it/s, loss=0.294, v_num=1, val_loss_epoch=0.416, train_loss=0.500, val_loss_step=1.680

[34m#015Validating:  32%|███▏      | 210/660 [00:19<00:41, 10.86it/s]#033[A#015Epoch 2:  96%|█████████▋| 12090/12538 [22:14<00:49,  9.06it/s, loss=0.336, v_num=1, val_loss_epoch=0.416, train_loss=0.372, val_loss_step=1.680][0m
[34m#015Validating:  36%|███▋      | 240/660 [00:22<00:38, 10.85it/s]#033[A#015Epoch 2:  97%|█████████▋| 12120/12538 [22:17<00:46,  9.06it/s, loss=0.336, v_num=1, val_loss_epoch=0.416, train_loss=0.372, val_loss_step=1.680][0m
[34m#015Validating:  41%|████      | 270/660 [00:25<00:36, 10.76it/s]#033[A#015Epoch 2:  97%|█████████▋| 12150/12538 [22:20<00:42,  9.06it/s, loss=0.336, v_num=1, val_loss_epoch=0.416, train_loss=0.372, val_loss_step=1.680][0m
[34m#015Validating:  45%|████▌     | 300/660 [00:28<00:33, 10.66it/s]#033[A#015Epoch 2:  97%|█████████▋| 12180/12538 [22:23<00:39,  9.07it/s, loss=0.336, v_num=1, val_loss_epoch=0.416, train_loss=0.372, val_loss_step=1.680][0m
[34m#015Validating:  50%|█████     | 330/660 [00:31<00:31, 10.51it/s]#033[A#015Epoc

[34m0,  9.04it/s, loss=0.276, v_num=1, val_loss_epoch=0.409, train_loss=0.258, val_loss_step=1.890]#015Epoch 3:  13%|█▎        | 1680/12538 [03:05<20:00,  9.04it/s, loss=0.282, v_num=1, val_loss_epoch=0.409, train_loss=0.254, val_loss_step=1.890]#015Epoch 3:  14%|█▎        | 1710/12538 [03:09<19:57,  9.05it/s, loss=0.282, v_num=1, val_loss_epoch=0.409, train_loss=0.254, val_loss_step=1.890]#015Epoch 3:  14%|█▎        | 1710/12538 [03:09<19:57,  9.05it/s, loss=0.291, v_num=1, val_loss_epoch=0.409, train_loss=0.313, val_loss_step=1.890]#015Epoch 3:  14%|█▍        | 1740/12538 [03:12<19:53,  9.04it/s, loss=0.291, v_num=1, val_loss_epoch=0.409, train_loss=0.313, val_loss_step=1.890]#015Epoch 3:  14%|█▍        | 1740/12538 [03:12<19:53,  9.04it/s, loss=0.305, v_num=1, val_loss_epoch=0.409, train_loss=0.332, val_loss_step=1.890]#015Epoch 3:  14%|█▍        | 1770/12538 [03:15<19:50,  9.04it/s, loss=0.305, v_num=1, val_loss_epoch=0.409, train_loss=0.332, val_loss_step=1.890]#015Epoch 3:  14%|

[34m3, v_num=1, val_loss_epoch=0.409, train_loss=0.271, val_loss_step=1.890]#015Epoch 3:  27%|██▋       | 3330/12538 [06:08<16:57,  9.05it/s, loss=0.307, v_num=1, val_loss_epoch=0.409, train_loss=0.374, val_loss_step=1.890]#015Epoch 3:  27%|██▋       | 3360/12538 [06:11<16:54,  9.05it/s, loss=0.307, v_num=1, val_loss_epoch=0.409, train_loss=0.374, val_loss_step=1.890]#015Epoch 3:  27%|██▋       | 3360/12538 [06:11<16:54,  9.05it/s, loss=0.294, v_num=1, val_loss_epoch=0.409, train_loss=0.266, val_loss_step=1.890]#015Epoch 3:  27%|██▋       | 3390/12538 [06:14<16:51,  9.05it/s, loss=0.294, v_num=1, val_loss_epoch=0.409, train_loss=0.266, val_loss_step=1.890]#015Epoch 3:  27%|██▋       | 3390/12538 [06:14<16:51,  9.05it/s, loss=0.301, v_num=1, val_loss_epoch=0.409, train_loss=0.268, val_loss_step=1.890]#015Epoch 3:  27%|██▋       | 3420/12538 [06:18<16:47,  9.05it/s, loss=0.301, v_num=1, val_loss_epoch=0.409, train_loss=0.268, val_loss_step=1.890]#015Epoch 3:  27%|██▋       | 3420/12538 

[34m_loss_step=1.890]#015Epoch 3:  39%|███▉      | 4950/12538 [09:07<13:59,  9.04it/s, loss=0.286, v_num=1, val_loss_epoch=0.409, train_loss=0.289, val_loss_step=1.890]#015Epoch 3:  40%|███▉      | 4980/12538 [09:10<13:55,  9.04it/s, loss=0.286, v_num=1, val_loss_epoch=0.409, train_loss=0.289, val_loss_step=1.890]#015Epoch 3:  40%|███▉      | 4980/12538 [09:10<13:55,  9.04it/s, loss=0.34, v_num=1, val_loss_epoch=0.409, train_loss=0.337, val_loss_step=1.890] #015Epoch 3:  40%|███▉      | 5010/12538 [09:13<13:52,  9.04it/s, loss=0.34, v_num=1, val_loss_epoch=0.409, train_loss=0.337, val_loss_step=1.890]#015Epoch 3:  40%|███▉      | 5010/12538 [09:13<13:52,  9.04it/s, loss=0.276, v_num=1, val_loss_epoch=0.409, train_loss=0.256, val_loss_step=1.890]#015Epoch 3:  40%|████      | 5040/12538 [09:17<13:49,  9.04it/s, loss=0.276, v_num=1, val_loss_epoch=0.409, train_loss=0.256, val_loss_step=1.890]#015Epoch 3:  40%|████      | 5040/12538 [09:17<13:49,  9.04it/s, loss=0.302, v_num=1, val_loss_e

[34m4it/s, loss=0.33, v_num=1, val_loss_epoch=0.409, train_loss=0.424, val_loss_step=1.890] #015Epoch 3:  52%|█████▏    | 6570/12538 [12:06<11:00,  9.04it/s, loss=0.33, v_num=1, val_loss_epoch=0.409, train_loss=0.424, val_loss_step=1.890]#015Epoch 3:  52%|█████▏    | 6570/12538 [12:06<11:00,  9.04it/s, loss=0.296, v_num=1, val_loss_epoch=0.409, train_loss=0.247, val_loss_step=1.890]#015Epoch 3:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.04it/s, loss=0.296, v_num=1, val_loss_epoch=0.409, train_loss=0.247, val_loss_step=1.890]#015Epoch 3:  53%|█████▎    | 6600/12538 [12:09<10:56,  9.04it/s, loss=0.288, v_num=1, val_loss_epoch=0.409, train_loss=0.287, val_loss_step=1.890]#015Epoch 3:  53%|█████▎    | 6630/12538 [12:13<10:53,  9.04it/s, loss=0.288, v_num=1, val_loss_epoch=0.409, train_loss=0.287, val_loss_step=1.890]#015Epoch 3:  53%|█████▎    | 6630/12538 [12:13<10:53,  9.04it/s, loss=0.312, v_num=1, val_loss_epoch=0.409, train_loss=0.296, val_loss_step=1.890]#015Epoch 3:  53%|█████▎  

[34m�█████▍   | 8130/12538 [14:59<08:07,  9.04it/s, loss=0.312, v_num=1, val_loss_epoch=0.409, train_loss=0.246, val_loss_step=1.890]#015Epoch 3:  65%|██████▍   | 8130/12538 [14:59<08:07,  9.04it/s, loss=0.284, v_num=1, val_loss_epoch=0.409, train_loss=0.314, val_loss_step=1.890]#015Epoch 3:  65%|██████▌   | 8160/12538 [15:02<08:04,  9.04it/s, loss=0.284, v_num=1, val_loss_epoch=0.409, train_loss=0.314, val_loss_step=1.890]#015Epoch 3:  65%|██████▌   | 8160/12538 [15:02<08:04,  9.04it/s, loss=0.321, v_num=1, val_loss_epoch=0.409, train_loss=0.228, val_loss_step=1.890]#015Epoch 3:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.04it/s, loss=0.321, v_num=1, val_loss_epoch=0.409, train_loss=0.228, val_loss_step=1.890]#015Epoch 3:  65%|██████▌   | 8190/12538 [15:05<08:00,  9.04it/s, loss=0.328, v_num=1, val_loss_epoch=0.409, train_loss=0.362, val_loss_step=1.890]#015Epoch 3:  66%|██████▌   | 8220/12538 [15:09<07:57,  9.04it/s, loss=0.328, v_num=1, val_loss_epoch=0.409, train_loss=0.362, val_

[34m█▋  | 9660/12538 [17:48<05:18,  9.04it/s, loss=0.303, v_num=1, val_loss_epoch=0.409, train_loss=0.317, val_loss_step=1.890]#015Epoch 3:  77%|███████▋  | 9690/12538 [17:51<05:14,  9.04it/s, loss=0.303, v_num=1, val_loss_epoch=0.409, train_loss=0.317, val_loss_step=1.890]#015Epoch 3:  77%|███████▋  | 9690/12538 [17:51<05:14,  9.04it/s, loss=0.29, v_num=1, val_loss_epoch=0.409, train_loss=0.223, val_loss_step=1.890] #015Epoch 3:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.04it/s, loss=0.29, v_num=1, val_loss_epoch=0.409, train_loss=0.223, val_loss_step=1.890]#015Epoch 3:  78%|███████▊  | 9720/12538 [17:54<05:11,  9.04it/s, loss=0.299, v_num=1, val_loss_epoch=0.409, train_loss=0.447, val_loss_step=1.890]#015Epoch 3:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.05it/s, loss=0.299, v_num=1, val_loss_epoch=0.409, train_loss=0.447, val_loss_step=1.890]#015Epoch 3:  78%|███████▊  | 9750/12538 [17:57<05:08,  9.05it/s, loss=0.283, v_num=1, val_loss_epoch=0.409, train_loss=0.327, val_loss_st

[34m38 [20:37<02:29,  9.04it/s, loss=0.317, v_num=1, val_loss_epoch=0.409, train_loss=0.350, val_loss_step=1.890]#015Epoch 3:  89%|████████▉ | 11190/12538 [20:37<02:29,  9.04it/s, loss=0.303, v_num=1, val_loss_epoch=0.409, train_loss=0.294, val_loss_step=1.890]#015Epoch 3:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.04it/s, loss=0.303, v_num=1, val_loss_epoch=0.409, train_loss=0.294, val_loss_step=1.890]#015Epoch 3:  89%|████████▉ | 11220/12538 [20:40<02:25,  9.04it/s, loss=0.296, v_num=1, val_loss_epoch=0.409, train_loss=0.307, val_loss_step=1.890]#015Epoch 3:  90%|████████▉ | 11250/12538 [20:43<02:22,  9.04it/s, loss=0.296, v_num=1, val_loss_epoch=0.409, train_loss=0.307, val_loss_step=1.890]#015Epoch 3:  90%|████████▉ | 11250/12538 [20:43<02:22,  9.04it/s, loss=0.286, v_num=1, val_loss_epoch=0.409, train_loss=0.277, val_loss_step=1.890]#015Epoch 3:  90%|████████▉ | 11280/12538 [20:47<02:19,  9.04it/s, loss=0.286, v_num=1, val_loss_epoch=0.409, train_loss=0.277, val_loss_step=1.89

[34m#015Validating:  32%|███▏      | 210/660 [00:19<00:41, 10.80it/s]#033[A#015Epoch 3:  96%|█████████▋| 12090/12538 [22:13<00:49,  9.07it/s, loss=0.281, v_num=1, val_loss_epoch=0.409, train_loss=0.303, val_loss_step=1.890][0m
[34m#015Validating:  36%|███▋      | 240/660 [00:22<00:38, 10.77it/s]#033[A#015Epoch 3:  97%|█████████▋| 12120/12538 [22:16<00:46,  9.07it/s, loss=0.281, v_num=1, val_loss_epoch=0.409, train_loss=0.303, val_loss_step=1.890][0m
[34m#015Validating:  41%|████      | 270/660 [00:25<00:36, 10.66it/s]#033[A#015Epoch 3:  97%|█████████▋| 12150/12538 [22:19<00:42,  9.07it/s, loss=0.281, v_num=1, val_loss_epoch=0.409, train_loss=0.303, val_loss_step=1.890][0m
[34m#015Validating:  45%|████▌     | 300/660 [00:28<00:34, 10.55it/s]#033[A#015Epoch 3:  97%|█████████▋| 12180/12538 [22:22<00:39,  9.07it/s, loss=0.281, v_num=1, val_loss_epoch=0.409, train_loss=0.303, val_loss_step=1.890][0m
[34m#015Validating:  50%|█████     | 330/660 [00:31<00:31, 10.44it/s]#033[A#015Epoc

[34m6<20:03,  9.02it/s, loss=0.268, v_num=1, val_loss_epoch=0.431, train_loss=0.287, val_loss_step=2.000]#015Epoch 4:  13%|█▎        | 1680/12538 [03:06<20:03,  9.02it/s, loss=0.274, v_num=1, val_loss_epoch=0.431, train_loss=0.325, val_loss_step=2.000]#015Epoch 4:  14%|█▎        | 1710/12538 [03:09<20:00,  9.02it/s, loss=0.274, v_num=1, val_loss_epoch=0.431, train_loss=0.325, val_loss_step=2.000]#015Epoch 4:  14%|█▎        | 1710/12538 [03:09<20:00,  9.02it/s, loss=0.274, v_num=1, val_loss_epoch=0.431, train_loss=0.386, val_loss_step=2.000]#015Epoch 4:  14%|█▍        | 1740/12538 [03:12<19:57,  9.02it/s, loss=0.274, v_num=1, val_loss_epoch=0.431, train_loss=0.386, val_loss_step=2.000]#015Epoch 4:  14%|█▍        | 1740/12538 [03:12<19:57,  9.02it/s, loss=0.264, v_num=1, val_loss_epoch=0.431, train_loss=0.245, val_loss_step=2.000]#015Epoch 4:  14%|█▍        | 1770/12538 [03:16<19:53,  9.02it/s, loss=0.264, v_num=1, val_loss_epoch=0.431, train_loss=0.245, val_loss_step=2.000]#015Epoch 4:

[34m=0.267, v_num=1, val_loss_epoch=0.431, train_loss=0.191, val_loss_step=2.000]#015Epoch 4:  27%|██▋       | 3330/12538 [06:08<17:00,  9.03it/s, loss=0.258, v_num=1, val_loss_epoch=0.431, train_loss=0.319, val_loss_step=2.000]#015Epoch 4:  27%|██▋       | 3360/12538 [06:12<16:57,  9.02it/s, loss=0.258, v_num=1, val_loss_epoch=0.431, train_loss=0.319, val_loss_step=2.000]#015Epoch 4:  27%|██▋       | 3360/12538 [06:12<16:57,  9.02it/s, loss=0.272, v_num=1, val_loss_epoch=0.431, train_loss=0.370, val_loss_step=2.000]#015Epoch 4:  27%|██▋       | 3390/12538 [06:15<16:53,  9.02it/s, loss=0.272, v_num=1, val_loss_epoch=0.431, train_loss=0.370, val_loss_step=2.000]#015Epoch 4:  27%|██▋       | 3390/12538 [06:15<16:53,  9.02it/s, loss=0.273, v_num=1, val_loss_epoch=0.431, train_loss=0.218, val_loss_step=2.000]#015Epoch 4:  27%|██▋       | 3420/12538 [06:18<16:50,  9.02it/s, loss=0.273, v_num=1, val_loss_epoch=0.431, train_loss=0.218, val_loss_step=2.000]#015Epoch 4:  27%|██▋       | 3420/1

[34m.355, val_loss_step=2.000]#015Epoch 4:  39%|███▉      | 4950/12538 [09:08<14:00,  9.03it/s, loss=0.285, v_num=1, val_loss_epoch=0.431, train_loss=0.244, val_loss_step=2.000]#015Epoch 4:  40%|███▉      | 4980/12538 [09:11<13:57,  9.03it/s, loss=0.285, v_num=1, val_loss_epoch=0.431, train_loss=0.244, val_loss_step=2.000]#015Epoch 4:  40%|███▉      | 4980/12538 [09:11<13:57,  9.03it/s, loss=0.275, v_num=1, val_loss_epoch=0.431, train_loss=0.261, val_loss_step=2.000]#015Epoch 4:  40%|███▉      | 5010/12538 [09:14<13:53,  9.03it/s, loss=0.275, v_num=1, val_loss_epoch=0.431, train_loss=0.261, val_loss_step=2.000]#015Epoch 4:  40%|███▉      | 5010/12538 [09:14<13:53,  9.03it/s, loss=0.273, v_num=1, val_loss_epoch=0.431, train_loss=0.272, val_loss_step=2.000]#015Epoch 4:  40%|████      | 5040/12538 [09:18<13:50,  9.03it/s, loss=0.273, v_num=1, val_loss_epoch=0.431, train_loss=0.272, val_loss_step=2.000]#015Epoch 4:  40%|████      | 5040/12538 [09:18<13:50,  9.03it/s, loss=0.262, v_num=1, 

[34m:04<11:04,  9.03it/s, loss=0.314, v_num=1, val_loss_epoch=0.431, train_loss=0.287, val_loss_step=2.000]#015Epoch 4:  52%|█████▏    | 6570/12538 [12:07<11:00,  9.03it/s, loss=0.314, v_num=1, val_loss_epoch=0.431, train_loss=0.287, val_loss_step=2.000]#015Epoch 4:  52%|█████▏    | 6570/12538 [12:07<11:00,  9.03it/s, loss=0.266, v_num=1, val_loss_epoch=0.431, train_loss=0.248, val_loss_step=2.000]#015Epoch 4:  53%|█████▎    | 6600/12538 [12:10<10:57,  9.03it/s, loss=0.266, v_num=1, val_loss_epoch=0.431, train_loss=0.248, val_loss_step=2.000]#015Epoch 4:  53%|█████▎    | 6600/12538 [12:10<10:57,  9.03it/s, loss=0.286, v_num=1, val_loss_epoch=0.431, train_loss=0.272, val_loss_step=2.000]#015Epoch 4:  53%|█████▎    | 6630/12538 [12:14<10:54,  9.03it/s, loss=0.286, v_num=1, val_loss_epoch=0.431, train_loss=0.272, val_loss_step=2.000]#015Epoch 4:  53%|█████▎    | 6630/12538 [12:14<10:54,  9.03it/s, loss=0.241, v_num=1, val_loss_epoch=0.431, train_loss=0.372, val_loss_step=2.000]#015Epoch 

[34m #015Epoch 4:  65%|██████▍   | 8130/12538 [15:00<08:08,  9.03it/s, loss=0.26, v_num=1, val_loss_epoch=0.431, train_loss=0.299, val_loss_step=2.000]#015Epoch 4:  65%|██████▍   | 8130/12538 [15:00<08:08,  9.03it/s, loss=0.265, v_num=1, val_loss_epoch=0.431, train_loss=0.269, val_loss_step=2.000]#015Epoch 4:  65%|██████▌   | 8160/12538 [15:04<08:05,  9.03it/s, loss=0.265, v_num=1, val_loss_epoch=0.431, train_loss=0.269, val_loss_step=2.000]#015Epoch 4:  65%|██████▌   | 8160/12538 [15:04<08:05,  9.03it/s, loss=0.238, v_num=1, val_loss_epoch=0.431, train_loss=0.281, val_loss_step=2.000]#015Epoch 4:  65%|██████▌   | 8190/12538 [15:07<08:01,  9.02it/s, loss=0.238, v_num=1, val_loss_epoch=0.431, train_loss=0.281, val_loss_step=2.000]#015Epoch 4:  65%|██████▌   | 8190/12538 [15:07<08:01,  9.02it/s, loss=0.254, v_num=1, val_loss_epoch=0.431, train_loss=0.238, val_loss_step=2.000]#015Epoch 4:  66%|██████▌   | 8220/12538 [15:10<07:58,  9.03it/s, loss=0.254, v_num=1, val_loss_epoch=0.431, trai

[34m██████▋  | 9660/12538 [17:50<05:18,  9.03it/s, loss=0.253, v_num=1, val_loss_epoch=0.431, train_loss=0.363, val_loss_step=2.000]#015Epoch 4:  77%|███████▋  | 9690/12538 [17:53<05:15,  9.03it/s, loss=0.253, v_num=1, val_loss_epoch=0.431, train_loss=0.363, val_loss_step=2.000]#015Epoch 4:  77%|███████▋  | 9690/12538 [17:53<05:15,  9.03it/s, loss=0.286, v_num=1, val_loss_epoch=0.431, train_loss=0.350, val_loss_step=2.000]#015Epoch 4:  78%|███████▊  | 9720/12538 [17:56<05:12,  9.03it/s, loss=0.286, v_num=1, val_loss_epoch=0.431, train_loss=0.350, val_loss_step=2.000]#015Epoch 4:  78%|███████▊  | 9720/12538 [17:56<05:12,  9.03it/s, loss=0.254, v_num=1, val_loss_epoch=0.431, train_loss=0.256, val_loss_step=2.000]#015Epoch 4:  78%|███████▊  | 9750/12538 [18:00<05:08,  9.03it/s, loss=0.254, v_num=1, val_loss_epoch=0.431, train_loss=0.256, val_loss_step=2.000]#015Epoch 4:  78%|███████▊  | 9750/12538 [18:00<05:08,  9.03it/s, loss=0.283, v_num=1, val_loss_epoch=0.431, train_loss=0.291, val_l

[34m�▉ | 11190/12538 [20:39<02:29,  9.03it/s, loss=0.26, v_num=1, val_loss_epoch=0.431, train_loss=0.303, val_loss_step=2.000]#015Epoch 4:  89%|████████▉ | 11190/12538 [20:39<02:29,  9.03it/s, loss=0.256, v_num=1, val_loss_epoch=0.431, train_loss=0.283, val_loss_step=2.000]#015Epoch 4:  89%|████████▉ | 11220/12538 [20:42<02:25,  9.03it/s, loss=0.256, v_num=1, val_loss_epoch=0.431, train_loss=0.283, val_loss_step=2.000]#015Epoch 4:  89%|████████▉ | 11220/12538 [20:42<02:25,  9.03it/s, loss=0.262, v_num=1, val_loss_epoch=0.431, train_loss=0.257, val_loss_step=2.000]#015Epoch 4:  90%|████████▉ | 11250/12538 [20:46<02:22,  9.03it/s, loss=0.262, v_num=1, val_loss_epoch=0.431, train_loss=0.257, val_loss_step=2.000]#015Epoch 4:  90%|████████▉ | 11250/12538 [20:46<02:22,  9.03it/s, loss=0.261, v_num=1, val_loss_epoch=0.431, train_loss=0.226, val_loss_step=2.000]#015Epoch 4:  90%|████████▉ | 11280/12538 [20:49<02:19,  9.03it/s, loss=0.261, v_num=1, val_loss_epoch=0.431, train_loss=0.226, val_l

[34m#015Validating:  41%|████      | 270/660 [00:25<00:36, 10.59it/s]#033[A#015Epoch 4:  97%|█████████▋| 12150/12538 [22:22<00:42,  9.05it/s, loss=0.282, v_num=1, val_loss_epoch=0.431, train_loss=0.203, val_loss_step=2.000][0m
[34m#015Validating:  45%|████▌     | 300/660 [00:28<00:34, 10.50it/s]#033[A#015Epoch 4:  97%|█████████▋| 12180/12538 [22:25<00:39,  9.06it/s, loss=0.282, v_num=1, val_loss_epoch=0.431, train_loss=0.203, val_loss_step=2.000][0m
[34m#015Validating:  50%|█████     | 330/660 [00:31<00:31, 10.39it/s]#033[A#015Epoch 4:  97%|█████████▋| 12210/12538 [22:28<00:36,  9.06it/s, loss=0.282, v_num=1, val_loss_epoch=0.431, train_loss=0.203, val_loss_step=2.000][0m
[34m#015Validating:  55%|█████▍    | 360/660 [00:34<00:29, 10.26it/s]#033[A#015Epoch 4:  98%|█████████▊| 12240/12538 [22:31<00:32,  9.06it/s, loss=0.282, v_num=1, val_loss_epoch=0.431, train_loss=0.203, val_loss_step=2.000][0m
[34m#015Validating:  59%|█████▉    | 390/660 [00:37<00:26, 10.26it/s]#033[A#015Epoc

[34m  21%|██▏       | 1950/9137 [03:36<13:16,  9.02it/s, loss=1.05, v_num=0, val_loss_epoch=0.634, train_loss=1.230] #015Epoch 0:  22%|██▏       | 1980/9137 [03:39<13:12,  9.03it/s, loss=1.05, v_num=0, val_loss_epoch=0.634, train_loss=1.230]#015Epoch 0:  22%|██▏       | 1980/9137 [03:39<13:12,  9.03it/s, loss=1, v_num=0, val_loss_epoch=0.634, train_loss=1.050]   #015Epoch 0:  22%|██▏       | 2010/9137 [03:42<13:09,  9.03it/s, loss=1, v_num=0, val_loss_epoch=0.634, train_loss=1.050]#015Epoch 0:  22%|██▏       | 2010/9137 [03:42<13:09,  9.03it/s, loss=1.02, v_num=0, val_loss_epoch=0.634, train_loss=1.050]#015Epoch 0:  22%|██▏       | 2040/9137 [03:45<13:06,  9.03it/s, loss=1.02, v_num=0, val_loss_epoch=0.634, train_loss=1.050]#015Epoch 0:  22%|██▏       | 2040/9137 [03:45<13:06,  9.03it/s, loss=1.09, v_num=0, val_loss_epoch=0.634, train_loss=1.270]#015Epoch 0:  23%|██▎       | 2070/9137 [03:49<13:02,  9.03it/s, loss=1.09, v_num=0, val_loss_epoch=0.634, train_loss=1.270]#015Epoch 0:  23%

[34m, loss=1, v_num=0, val_loss_epoch=0.634, train_loss=0.602]#015Epoch 0:  42%|████▏     | 3870/9137 [07:08<09:43,  9.03it/s, loss=0.926, v_num=0, val_loss_epoch=0.634, train_loss=0.717]#015Epoch 0:  43%|████▎     | 3900/9137 [07:12<09:40,  9.03it/s, loss=0.926, v_num=0, val_loss_epoch=0.634, train_loss=0.717]#015Epoch 0:  43%|████▎     | 3900/9137 [07:12<09:40,  9.03it/s, loss=0.999, v_num=0, val_loss_epoch=0.634, train_loss=0.938]#015Epoch 0:  43%|████▎     | 3930/9137 [07:15<09:36,  9.03it/s, loss=0.999, v_num=0, val_loss_epoch=0.634, train_loss=0.938]#015Epoch 0:  43%|████▎     | 3930/9137 [07:15<09:36,  9.03it/s, loss=0.908, v_num=0, val_loss_epoch=0.634, train_loss=1.000]#015Epoch 0:  43%|████▎     | 3960/9137 [07:18<09:33,  9.03it/s, loss=0.908, v_num=0, val_loss_epoch=0.634, train_loss=1.000]#015Epoch 0:  43%|████▎     | 3960/9137 [07:18<09:33,  9.03it/s, loss=0.978, v_num=0, val_loss_epoch=0.634, train_loss=1.030]#015Epoch 0:  44%|████▎     | 3990/9137 [07:21<09:29,  9.03it/

[34mum=0, val_loss_epoch=0.634, train_loss=0.895]#015Epoch 0:  63%|██████▎   | 5730/9137 [10:34<06:17,  9.04it/s, loss=0.852, v_num=0, val_loss_epoch=0.634, train_loss=0.895]#015Epoch 0:  63%|██████▎   | 5730/9137 [10:34<06:17,  9.04it/s, loss=0.96, v_num=0, val_loss_epoch=0.634, train_loss=0.554] #015Epoch 0:  63%|██████▎   | 5760/9137 [10:37<06:13,  9.04it/s, loss=0.96, v_num=0, val_loss_epoch=0.634, train_loss=0.554]#015Epoch 0:  63%|██████▎   | 5760/9137 [10:37<06:13,  9.04it/s, loss=0.971, v_num=0, val_loss_epoch=0.634, train_loss=0.798]#015Epoch 0:  63%|██████▎   | 5790/9137 [10:40<06:10,  9.04it/s, loss=0.971, v_num=0, val_loss_epoch=0.634, train_loss=0.798]#015Epoch 0:  63%|██████▎   | 5790/9137 [10:40<06:10,  9.04it/s, loss=0.851, v_num=0, val_loss_epoch=0.634, train_loss=0.578]#015Epoch 0:  64%|██████▎   | 5820/9137 [10:44<06:07,  9.04it/s, loss=0.851, v_num=0, val_loss_epoch=0.634, train_loss=0.578]#015Epoch 0:  64%|██████▎   | 5820/9137 [10:44<06:07,  9.04it/s, loss=0.914,

[34m�███▏ | 7500/9137 [13:50<03:01,  9.04it/s, loss=0.75, v_num=0, val_loss_epoch=0.634, train_loss=0.551]#015Epoch 0:  82%|████████▏ | 7530/9137 [13:53<02:57,  9.04it/s, loss=0.75, v_num=0, val_loss_epoch=0.634, train_loss=0.551]#015Epoch 0:  82%|████████▏ | 7530/9137 [13:53<02:57,  9.04it/s, loss=0.888, v_num=0, val_loss_epoch=0.634, train_loss=1.540]#015Epoch 0:  83%|████████▎ | 7560/9137 [13:56<02:54,  9.04it/s, loss=0.888, v_num=0, val_loss_epoch=0.634, train_loss=1.540]#015Epoch 0:  83%|████████▎ | 7560/9137 [13:56<02:54,  9.04it/s, loss=0.866, v_num=0, val_loss_epoch=0.634, train_loss=1.010]#015Epoch 0:  83%|████████▎ | 7590/9137 [13:59<02:51,  9.04it/s, loss=0.866, v_num=0, val_loss_epoch=0.634, train_loss=1.010]#015Epoch 0:  83%|████████▎ | 7590/9137 [13:59<02:51,  9.04it/s, loss=1.01, v_num=0, val_loss_epoch=0.634, train_loss=0.884] #015Epoch 0:  83%|████████▎ | 7620/9137 [14:03<02:47,  9.04it/s, loss=1.01, v_num=0, val_loss_epoch=0.634, train_loss=0.884]#015Epoch 0:  83%|██

[34m#015Validating:   6%|▌         | 30/483 [00:02<00:43, 10.48it/s]#033[A#015Epoch 0:  95%|█████████▌| 8700/9137 [16:01<00:48,  9.05it/s, loss=0.897, v_num=0, val_loss_epoch=0.634, train_loss=0.967][0m
[34m#015Validating:  12%|█▏        | 60/483 [00:05<00:40, 10.48it/s]#033[A#015Epoch 0:  96%|█████████▌| 8730/9137 [16:03<00:44,  9.06it/s, loss=0.897, v_num=0, val_loss_epoch=0.634, train_loss=0.967][0m
[34m#015Validating:  19%|█▊        | 90/483 [00:08<00:37, 10.46it/s]#033[A#015Epoch 0:  96%|█████████▌| 8760/9137 [16:06<00:41,  9.06it/s, loss=0.897, v_num=0, val_loss_epoch=0.634, train_loss=0.967][0m
[34m#015Validating:  25%|██▍       | 120/483 [00:11<00:34, 10.54it/s]#033[A#015Epoch 0:  96%|█████████▌| 8790/9137 [16:09<00:38,  9.07it/s, loss=0.897, v_num=0, val_loss_epoch=0.634, train_loss=0.967][0m
[34m#015Validating:  31%|███       | 150/483 [00:14<00:31, 10.73it/s]#033[A#015Epoch 0:  97%|█████████▋| 8820/9137 [16:12<00:34,  9.07it/s, loss=0.897, v_num=0, val_loss_epoch=0.

[34mpoch=0.260, train_loss=1.020, val_loss_step=0.199]#015Epoch 1:  18%|█▊        | 1680/9137 [03:06<13:46,  9.02it/s, loss=0.818, v_num=0, val_loss_epoch=0.260, train_loss=1.540, val_loss_step=0.199]#015Epoch 1:  19%|█▊        | 1710/9137 [03:09<13:43,  9.02it/s, loss=0.818, v_num=0, val_loss_epoch=0.260, train_loss=1.540, val_loss_step=0.199]#015Epoch 1:  19%|█▊        | 1710/9137 [03:09<13:43,  9.02it/s, loss=0.805, v_num=0, val_loss_epoch=0.260, train_loss=0.511, val_loss_step=0.199]#015Epoch 1:  19%|█▉        | 1740/9137 [03:12<13:40,  9.02it/s, loss=0.805, v_num=0, val_loss_epoch=0.260, train_loss=0.511, val_loss_step=0.199]#015Epoch 1:  19%|█▉        | 1740/9137 [03:12<13:40,  9.02it/s, loss=0.984, v_num=0, val_loss_epoch=0.260, train_loss=0.689, val_loss_step=0.199]#015Epoch 1:  19%|█▉        | 1770/9137 [03:16<13:36,  9.02it/s, loss=0.984, v_num=0, val_loss_epoch=0.260, train_loss=0.689, val_loss_step=0.199]#015Epoch 1:  19%|█▉        | 1770/9137 [03:16<13:36,  9.02it/s, loss

[34m.833, v_num=0, val_loss_epoch=0.260, train_loss=0.601, val_loss_step=0.199]#015Epoch 1:  36%|███▋      | 3330/9137 [06:08<10:43,  9.03it/s, loss=0.802, v_num=0, val_loss_epoch=0.260, train_loss=0.718, val_loss_step=0.199]#015Epoch 1:  37%|███▋      | 3360/9137 [06:12<10:40,  9.03it/s, loss=0.802, v_num=0, val_loss_epoch=0.260, train_loss=0.718, val_loss_step=0.199]#015Epoch 1:  37%|███▋      | 3360/9137 [06:12<10:40,  9.03it/s, loss=0.66, v_num=0, val_loss_epoch=0.260, train_loss=0.697, val_loss_step=0.199] #015Epoch 1:  37%|███▋      | 3390/9137 [06:15<10:36,  9.03it/s, loss=0.66, v_num=0, val_loss_epoch=0.260, train_loss=0.697, val_loss_step=0.199]#015Epoch 1:  37%|███▋      | 3390/9137 [06:15<10:36,  9.03it/s, loss=0.769, v_num=0, val_loss_epoch=0.260, train_loss=0.729, val_loss_step=0.199]#015Epoch 1:  37%|███▋      | 3420/9137 [06:18<10:33,  9.03it/s, loss=0.769, v_num=0, val_loss_epoch=0.260, train_loss=0.729, val_loss_step=0.199]#015Epoch 1:  37%|███▋      | 3420/9137 [06:1

[34m_loss=0.858, val_loss_step=0.199]#015Epoch 1:  54%|█████▍    | 4950/9137 [09:08<07:43,  9.03it/s, loss=0.761, v_num=0, val_loss_epoch=0.260, train_loss=0.858, val_loss_step=0.199]#015Epoch 1:  54%|█████▍    | 4950/9137 [09:08<07:43,  9.03it/s, loss=0.795, v_num=0, val_loss_epoch=0.260, train_loss=0.785, val_loss_step=0.199]#015Epoch 1:  55%|█████▍    | 4980/9137 [09:11<07:40,  9.03it/s, loss=0.795, v_num=0, val_loss_epoch=0.260, train_loss=0.785, val_loss_step=0.199]#015Epoch 1:  55%|█████▍    | 4980/9137 [09:11<07:40,  9.03it/s, loss=0.819, v_num=0, val_loss_epoch=0.260, train_loss=0.700, val_loss_step=0.199]#015Epoch 1:  55%|█████▍    | 5010/9137 [09:14<07:37,  9.03it/s, loss=0.819, v_num=0, val_loss_epoch=0.260, train_loss=0.700, val_loss_step=0.199]#015Epoch 1:  55%|█████▍    | 5010/9137 [09:14<07:37,  9.03it/s, loss=0.813, v_num=0, val_loss_epoch=0.260, train_loss=0.951, val_loss_step=0.199]#015Epoch 1:  55%|█████▌    | 5040/9137 [09:18<07:33,  9.03it/s, loss=0.813, v_num=0, 

[34m0, train_loss=0.545, val_loss_step=0.199]#015Epoch 1:  71%|███████   | 6510/9137 [12:01<04:50,  9.03it/s, loss=0.909, v_num=0, val_loss_epoch=0.260, train_loss=1.230, val_loss_step=0.199]#015Epoch 1:  72%|███████▏  | 6540/9137 [12:04<04:47,  9.03it/s, loss=0.909, v_num=0, val_loss_epoch=0.260, train_loss=1.230, val_loss_step=0.199]#015Epoch 1:  72%|███████▏  | 6540/9137 [12:04<04:47,  9.03it/s, loss=0.687, v_num=0, val_loss_epoch=0.260, train_loss=0.365, val_loss_step=0.199]#015Epoch 1:  72%|███████▏  | 6570/9137 [12:07<04:44,  9.03it/s, loss=0.687, v_num=0, val_loss_epoch=0.260, train_loss=0.365, val_loss_step=0.199]#015Epoch 1:  72%|███████▏  | 6570/9137 [12:07<04:44,  9.03it/s, loss=0.754, v_num=0, val_loss_epoch=0.260, train_loss=0.415, val_loss_step=0.199]#015Epoch 1:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.03it/s, loss=0.754, v_num=0, val_loss_epoch=0.260, train_loss=0.415, val_loss_step=0.199]#015Epoch 1:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.03it/s, loss=0.787, v

[34m137 [14:50<02:01,  9.03it/s, loss=0.78, v_num=0, val_loss_epoch=0.260, train_loss=0.666, val_loss_step=0.199] #015Epoch 1:  88%|████████▊ | 8070/9137 [14:53<01:58,  9.03it/s, loss=0.78, v_num=0, val_loss_epoch=0.260, train_loss=0.666, val_loss_step=0.199]#015Epoch 1:  88%|████████▊ | 8070/9137 [14:53<01:58,  9.03it/s, loss=0.781, v_num=0, val_loss_epoch=0.260, train_loss=0.741, val_loss_step=0.199]#015Epoch 1:  89%|████████▊ | 8100/9137 [14:57<01:54,  9.03it/s, loss=0.781, v_num=0, val_loss_epoch=0.260, train_loss=0.741, val_loss_step=0.199]#015Epoch 1:  89%|████████▊ | 8100/9137 [14:57<01:54,  9.03it/s, loss=0.719, v_num=0, val_loss_epoch=0.260, train_loss=0.896, val_loss_step=0.199]#015Epoch 1:  89%|████████▉ | 8130/9137 [15:00<01:51,  9.03it/s, loss=0.719, v_num=0, val_loss_epoch=0.260, train_loss=0.896, val_loss_step=0.199]#015Epoch 1:  89%|████████▉ | 8130/9137 [15:00<01:51,  9.03it/s, loss=0.716, v_num=0, val_loss_epoch=0.260, train_loss=0.686, val_loss_step=0.199]#015Epoch 

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:14, 10.32it/s]#033[A#015Epoch 1:  99%|█████████▊| 9000/9137 [16:30<00:15,  9.09it/s, loss=0.793, v_num=0, val_loss_epoch=0.260, train_loss=0.613, val_loss_step=0.199][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.40it/s]#033[A#015Epoch 1:  99%|█████████▉| 9030/9137 [16:33<00:11,  9.09it/s, loss=0.793, v_num=0, val_loss_epoch=0.260, train_loss=0.613, val_loss_step=0.199][0m
[34m#015Validating:  81%|████████  | 390/483 [00:37<00:08, 10.43it/s]#033[A#015Epoch 1:  99%|█████████▉| 9060/9137 [16:36<00:08,  9.09it/s, loss=0.793, v_num=0, val_loss_epoch=0.260, train_loss=0.613, val_loss_step=0.199][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:40<00:05, 10.57it/s]#033[A#015Epoch 1:  99%|█████████▉| 9090/9137 [16:38<00:05,  9.10it/s, loss=0.793, v_num=0, val_loss_epoch=0.260, train_loss=0.613, val_loss_step=0.199][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:42<00:03, 10.68it/s]#033[A#015Epoch 1: 100

[34m0.243, train_loss=0.512, val_loss_step=0.206]#015Epoch 2:  18%|█▊        | 1680/9137 [03:06<13:48,  9.00it/s, loss=0.772, v_num=0, val_loss_epoch=0.243, train_loss=0.486, val_loss_step=0.206]#015Epoch 2:  19%|█▊        | 1710/9137 [03:09<13:45,  9.00it/s, loss=0.772, v_num=0, val_loss_epoch=0.243, train_loss=0.486, val_loss_step=0.206]#015Epoch 2:  19%|█▊        | 1710/9137 [03:09<13:45,  9.00it/s, loss=0.662, v_num=0, val_loss_epoch=0.243, train_loss=0.759, val_loss_step=0.206]#015Epoch 2:  19%|█▉        | 1740/9137 [03:13<13:41,  9.00it/s, loss=0.662, v_num=0, val_loss_epoch=0.243, train_loss=0.759, val_loss_step=0.206]#015Epoch 2:  19%|█▉        | 1740/9137 [03:13<13:41,  9.00it/s, loss=0.776, v_num=0, val_loss_epoch=0.243, train_loss=0.481, val_loss_step=0.206]#015Epoch 2:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.776, v_num=0, val_loss_epoch=0.243, train_loss=0.481, val_loss_step=0.206]#015Epoch 2:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.67

[34m9, v_num=0, val_loss_epoch=0.243, train_loss=0.426, val_loss_step=0.206]#015Epoch 2:  36%|███▋      | 3330/9137 [06:09<10:44,  9.01it/s, loss=0.723, v_num=0, val_loss_epoch=0.243, train_loss=0.558, val_loss_step=0.206]#015Epoch 2:  37%|███▋      | 3360/9137 [06:12<10:40,  9.01it/s, loss=0.723, v_num=0, val_loss_epoch=0.243, train_loss=0.558, val_loss_step=0.206]#015Epoch 2:  37%|███▋      | 3360/9137 [06:12<10:40,  9.01it/s, loss=0.729, v_num=0, val_loss_epoch=0.243, train_loss=0.888, val_loss_step=0.206]#015Epoch 2:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.729, v_num=0, val_loss_epoch=0.243, train_loss=0.888, val_loss_step=0.206]#015Epoch 2:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.693, v_num=0, val_loss_epoch=0.243, train_loss=0.552, val_loss_step=0.206]#015Epoch 2:  37%|███▋      | 3420/9137 [06:19<10:34,  9.01it/s, loss=0.693, v_num=0, val_loss_epoch=0.243, train_loss=0.552, val_loss_step=0.206]#015Epoch 2:  37%|███▋      | 3420/9137 [06:19<

[34m.376, val_loss_step=0.206]#015Epoch 2:  54%|█████▍    | 4950/9137 [09:08<07:44,  9.02it/s, loss=0.774, v_num=0, val_loss_epoch=0.243, train_loss=0.376, val_loss_step=0.206]#015Epoch 2:  54%|█████▍    | 4950/9137 [09:08<07:44,  9.02it/s, loss=0.695, v_num=0, val_loss_epoch=0.243, train_loss=0.825, val_loss_step=0.206]#015Epoch 2:  55%|█████▍    | 4980/9137 [09:12<07:40,  9.02it/s, loss=0.695, v_num=0, val_loss_epoch=0.243, train_loss=0.825, val_loss_step=0.206]#015Epoch 2:  55%|█████▍    | 4980/9137 [09:12<07:40,  9.02it/s, loss=0.684, v_num=0, val_loss_epoch=0.243, train_loss=0.620, val_loss_step=0.206]#015Epoch 2:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.684, v_num=0, val_loss_epoch=0.243, train_loss=0.620, val_loss_step=0.206]#015Epoch 2:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.741, v_num=0, val_loss_epoch=0.243, train_loss=0.783, val_loss_step=0.206]#015Epoch 2:  55%|█████▌    | 5040/9137 [09:18<07:34,  9.02it/s, loss=0.741, v_num=0, val_los

[34min_loss=1.330, val_loss_step=0.206]#015Epoch 2:  71%|███████   | 6510/9137 [12:01<04:51,  9.02it/s, loss=0.678, v_num=0, val_loss_epoch=0.243, train_loss=0.580, val_loss_step=0.206]#015Epoch 2:  72%|███████▏  | 6540/9137 [12:05<04:47,  9.02it/s, loss=0.678, v_num=0, val_loss_epoch=0.243, train_loss=0.580, val_loss_step=0.206]#015Epoch 2:  72%|███████▏  | 6540/9137 [12:05<04:47,  9.02it/s, loss=0.695, v_num=0, val_loss_epoch=0.243, train_loss=1.080, val_loss_step=0.206]#015Epoch 2:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.695, v_num=0, val_loss_epoch=0.243, train_loss=1.080, val_loss_step=0.206]#015Epoch 2:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.694, v_num=0, val_loss_epoch=0.243, train_loss=0.634, val_loss_step=0.206]#015Epoch 2:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.694, v_num=0, val_loss_epoch=0.243, train_loss=0.634, val_loss_step=0.206]#015Epoch 2:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.765, v_num=0

[34m14:51<02:01,  9.02it/s, loss=0.665, v_num=0, val_loss_epoch=0.243, train_loss=0.648, val_loss_step=0.206]#015Epoch 2:  88%|████████▊ | 8070/9137 [14:54<01:58,  9.02it/s, loss=0.665, v_num=0, val_loss_epoch=0.243, train_loss=0.648, val_loss_step=0.206]#015Epoch 2:  88%|████████▊ | 8070/9137 [14:54<01:58,  9.02it/s, loss=0.742, v_num=0, val_loss_epoch=0.243, train_loss=0.425, val_loss_step=0.206]#015Epoch 2:  89%|████████▊ | 8100/9137 [14:58<01:55,  9.02it/s, loss=0.742, v_num=0, val_loss_epoch=0.243, train_loss=0.425, val_loss_step=0.206]#015Epoch 2:  89%|████████▊ | 8100/9137 [14:58<01:55,  9.02it/s, loss=0.697, v_num=0, val_loss_epoch=0.243, train_loss=0.562, val_loss_step=0.206]#015Epoch 2:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.697, v_num=0, val_loss_epoch=0.243, train_loss=0.562, val_loss_step=0.206]#015Epoch 2:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.72, v_num=0, val_loss_epoch=0.243, train_loss=0.602, val_loss_step=0.206] #015Epoch 2:  

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:14, 10.35it/s]#033[A#015Epoch 2:  99%|█████████▊| 9000/9137 [16:31<00:15,  9.08it/s, loss=0.733, v_num=0, val_loss_epoch=0.243, train_loss=0.358, val_loss_step=0.206][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.44it/s]#033[A#015Epoch 2:  99%|█████████▉| 9030/9137 [16:34<00:11,  9.08it/s, loss=0.733, v_num=0, val_loss_epoch=0.243, train_loss=0.358, val_loss_step=0.206][0m
[34m#015Validating:  81%|████████  | 390/483 [00:36<00:08, 10.46it/s]#033[A#015Epoch 2:  99%|█████████▉| 9060/9137 [16:37<00:08,  9.09it/s, loss=0.733, v_num=0, val_loss_epoch=0.243, train_loss=0.358, val_loss_step=0.206][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:39<00:05, 10.60it/s]#033[A#015Epoch 2:  99%|█████████▉| 9090/9137 [16:39<00:05,  9.09it/s, loss=0.733, v_num=0, val_loss_epoch=0.243, train_loss=0.358, val_loss_step=0.206][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:42<00:03, 10.73it/s]#033[A#015Epoch 2: 100

[34mpoch=0.240, train_loss=1.080, val_loss_step=0.210]#015Epoch 3:  18%|█▊        | 1680/9137 [03:06<13:47,  9.01it/s, loss=0.601, v_num=0, val_loss_epoch=0.240, train_loss=0.751, val_loss_step=0.210]#015Epoch 3:  19%|█▊        | 1710/9137 [03:09<13:44,  9.01it/s, loss=0.601, v_num=0, val_loss_epoch=0.240, train_loss=0.751, val_loss_step=0.210]#015Epoch 3:  19%|█▊        | 1710/9137 [03:09<13:44,  9.01it/s, loss=0.678, v_num=0, val_loss_epoch=0.240, train_loss=0.784, val_loss_step=0.210]#015Epoch 3:  19%|█▉        | 1740/9137 [03:13<13:41,  9.01it/s, loss=0.678, v_num=0, val_loss_epoch=0.240, train_loss=0.784, val_loss_step=0.210]#015Epoch 3:  19%|█▉        | 1740/9137 [03:13<13:41,  9.01it/s, loss=0.638, v_num=0, val_loss_epoch=0.240, train_loss=0.354, val_loss_step=0.210]#015Epoch 3:  19%|█▉        | 1770/9137 [03:16<13:37,  9.01it/s, loss=0.638, v_num=0, val_loss_epoch=0.240, train_loss=0.354, val_loss_step=0.210]#015Epoch 3:  19%|█▉        | 1770/9137 [03:16<13:37,  9.01it/s, loss

[34mloss=0.648, v_num=0, val_loss_epoch=0.240, train_loss=0.547, val_loss_step=0.210]#015Epoch 3:  36%|███▋      | 3330/9137 [06:09<10:44,  9.01it/s, loss=0.658, v_num=0, val_loss_epoch=0.240, train_loss=0.529, val_loss_step=0.210]#015Epoch 3:  37%|███▋      | 3360/9137 [06:12<10:41,  9.01it/s, loss=0.658, v_num=0, val_loss_epoch=0.240, train_loss=0.529, val_loss_step=0.210]#015Epoch 3:  37%|███▋      | 3360/9137 [06:12<10:41,  9.01it/s, loss=0.699, v_num=0, val_loss_epoch=0.240, train_loss=0.672, val_loss_step=0.210]#015Epoch 3:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.699, v_num=0, val_loss_epoch=0.240, train_loss=0.672, val_loss_step=0.210]#015Epoch 3:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.574, v_num=0, val_loss_epoch=0.240, train_loss=0.514, val_loss_step=0.210]#015Epoch 3:  37%|███▋      | 3420/9137 [06:19<10:34,  9.01it/s, loss=0.574, v_num=0, val_loss_epoch=0.240, train_loss=0.514, val_loss_step=0.210]#015Epoch 3:  37%|███▋      | 3420/913

[34m, train_loss=0.882, val_loss_step=0.210]#015Epoch 3:  54%|█████▍    | 4950/9137 [09:08<07:44,  9.02it/s, loss=0.692, v_num=0, val_loss_epoch=0.240, train_loss=0.882, val_loss_step=0.210]#015Epoch 3:  54%|█████▍    | 4950/9137 [09:08<07:44,  9.02it/s, loss=0.593, v_num=0, val_loss_epoch=0.240, train_loss=0.858, val_loss_step=0.210]#015Epoch 3:  55%|█████▍    | 4980/9137 [09:12<07:40,  9.02it/s, loss=0.593, v_num=0, val_loss_epoch=0.240, train_loss=0.858, val_loss_step=0.210]#015Epoch 3:  55%|█████▍    | 4980/9137 [09:12<07:40,  9.02it/s, loss=0.65, v_num=0, val_loss_epoch=0.240, train_loss=0.821, val_loss_step=0.210] #015Epoch 3:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.65, v_num=0, val_loss_epoch=0.240, train_loss=0.821, val_loss_step=0.210]#015Epoch 3:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.764, v_num=0, val_loss_epoch=0.240, train_loss=0.699, val_loss_step=0.210]#015Epoch 3:  55%|█████▌    | 5040/9137 [09:18<07:34,  9.02it/s, loss=0.764, v_n

[34mh=0.240, train_loss=0.573, val_loss_step=0.210]#015Epoch 3:  71%|███████   | 6510/9137 [12:01<04:51,  9.02it/s, loss=0.631, v_num=0, val_loss_epoch=0.240, train_loss=0.430, val_loss_step=0.210]#015Epoch 3:  72%|███████▏  | 6540/9137 [12:04<04:47,  9.02it/s, loss=0.631, v_num=0, val_loss_epoch=0.240, train_loss=0.430, val_loss_step=0.210]#015Epoch 3:  72%|███████▏  | 6540/9137 [12:04<04:47,  9.02it/s, loss=0.597, v_num=0, val_loss_epoch=0.240, train_loss=0.355, val_loss_step=0.210]#015Epoch 3:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.597, v_num=0, val_loss_epoch=0.240, train_loss=0.355, val_loss_step=0.210]#015Epoch 3:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.615, v_num=0, val_loss_epoch=0.240, train_loss=0.600, val_loss_step=0.210]#015Epoch 3:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.615, v_num=0, val_loss_epoch=0.240, train_loss=0.600, val_loss_step=0.210]#015Epoch 3:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.

[34m0/9137 [14:51<02:01,  9.02it/s, loss=0.59, v_num=0, val_loss_epoch=0.240, train_loss=0.432, val_loss_step=0.210] #015Epoch 3:  88%|████████▊ | 8070/9137 [14:54<01:58,  9.02it/s, loss=0.59, v_num=0, val_loss_epoch=0.240, train_loss=0.432, val_loss_step=0.210]#015Epoch 3:  88%|████████▊ | 8070/9137 [14:54<01:58,  9.02it/s, loss=0.528, v_num=0, val_loss_epoch=0.240, train_loss=0.566, val_loss_step=0.210]#015Epoch 3:  89%|████████▊ | 8100/9137 [14:57<01:54,  9.02it/s, loss=0.528, v_num=0, val_loss_epoch=0.240, train_loss=0.566, val_loss_step=0.210]#015Epoch 3:  89%|████████▊ | 8100/9137 [14:57<01:54,  9.02it/s, loss=0.572, v_num=0, val_loss_epoch=0.240, train_loss=0.247, val_loss_step=0.210]#015Epoch 3:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.572, v_num=0, val_loss_epoch=0.240, train_loss=0.247, val_loss_step=0.210]#015Epoch 3:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.678, v_num=0, val_loss_epoch=0.240, train_loss=1.710, val_loss_step=0.210]#015Epo

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:14, 10.29it/s]#033[A#015Epoch 3:  99%|█████████▊| 9000/9137 [16:31<00:15,  9.08it/s, loss=0.569, v_num=0, val_loss_epoch=0.240, train_loss=0.482, val_loss_step=0.210][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.36it/s]#033[A#015Epoch 3:  99%|█████████▉| 9030/9137 [16:33<00:11,  9.09it/s, loss=0.569, v_num=0, val_loss_epoch=0.240, train_loss=0.482, val_loss_step=0.210][0m
[34m#015Validating:  81%|████████  | 390/483 [00:37<00:09, 10.32it/s]#033[A#015Epoch 3:  99%|█████████▉| 9060/9137 [16:36<00:08,  9.09it/s, loss=0.569, v_num=0, val_loss_epoch=0.240, train_loss=0.482, val_loss_step=0.210][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:40<00:06, 10.49it/s]#033[A#015Epoch 3:  99%|█████████▉| 9090/9137 [16:39<00:05,  9.09it/s, loss=0.569, v_num=0, val_loss_epoch=0.240, train_loss=0.482, val_loss_step=0.210][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:42<00:03, 10.64it/s]#033[A#015Epoch 3: 100

[34m�█▏       | 1950/9137 [03:36<13:17,  9.01it/s, loss=1.14, v_num=1, val_loss_epoch=0.738, train_loss=1.430]#015Epoch 0:  22%|██▏       | 1980/9137 [03:39<13:13,  9.02it/s, loss=1.14, v_num=1, val_loss_epoch=0.738, train_loss=1.430]#015Epoch 0:  22%|██▏       | 1980/9137 [03:39<13:13,  9.02it/s, loss=1.09, v_num=1, val_loss_epoch=0.738, train_loss=1.220]#015Epoch 0:  22%|██▏       | 2010/9137 [03:42<13:10,  9.02it/s, loss=1.09, v_num=1, val_loss_epoch=0.738, train_loss=1.220]#015Epoch 0:  22%|██▏       | 2010/9137 [03:42<13:10,  9.02it/s, loss=1.06, v_num=1, val_loss_epoch=0.738, train_loss=1.010]#015Epoch 0:  22%|██▏       | 2040/9137 [03:46<13:06,  9.02it/s, loss=1.06, v_num=1, val_loss_epoch=0.738, train_loss=1.010]#015Epoch 0:  22%|██▏       | 2040/9137 [03:46<13:06,  9.02it/s, loss=0.989, v_num=1, val_loss_epoch=0.738, train_loss=0.807]#015Epoch 0:  23%|██▎       | 2070/9137 [03:49<13:03,  9.02it/s, loss=0.989, v_num=1, val_loss_epoch=0.738, train_loss=0.807]#015Epoch 0:  23%|█

[34m, val_loss_epoch=0.738, train_loss=0.950]#015Epoch 0:  42%|████▏     | 3870/9137 [07:08<09:43,  9.03it/s, loss=1.01, v_num=1, val_loss_epoch=0.738, train_loss=0.704]#015Epoch 0:  43%|████▎     | 3900/9137 [07:12<09:40,  9.03it/s, loss=1.01, v_num=1, val_loss_epoch=0.738, train_loss=0.704]#015Epoch 0:  43%|████▎     | 3900/9137 [07:12<09:40,  9.03it/s, loss=1.04, v_num=1, val_loss_epoch=0.738, train_loss=1.010]#015Epoch 0:  43%|████▎     | 3930/9137 [07:15<09:36,  9.03it/s, loss=1.04, v_num=1, val_loss_epoch=0.738, train_loss=1.010]#015Epoch 0:  43%|████▎     | 3930/9137 [07:15<09:36,  9.03it/s, loss=0.913, v_num=1, val_loss_epoch=0.738, train_loss=1.390]#015Epoch 0:  43%|████▎     | 3960/9137 [07:18<09:33,  9.02it/s, loss=0.913, v_num=1, val_loss_epoch=0.738, train_loss=1.390]#015Epoch 0:  43%|████▎     | 3960/9137 [07:18<09:33,  9.02it/s, loss=0.915, v_num=1, val_loss_epoch=0.738, train_loss=0.798]#015Epoch 0:  44%|████▎     | 3990/9137 [07:22<09:30,  9.03it/s, loss=0.915, v_num=

[34mtrain_loss=0.735]#015Epoch 0:  63%|██████▎   | 5730/9137 [10:34<06:17,  9.03it/s, loss=0.988, v_num=1, val_loss_epoch=0.738, train_loss=0.735]#015Epoch 0:  63%|██████▎   | 5730/9137 [10:34<06:17,  9.03it/s, loss=0.978, v_num=1, val_loss_epoch=0.738, train_loss=1.160]#015Epoch 0:  63%|██████▎   | 5760/9137 [10:37<06:13,  9.03it/s, loss=0.978, v_num=1, val_loss_epoch=0.738, train_loss=1.160]#015Epoch 0:  63%|██████▎   | 5760/9137 [10:37<06:13,  9.03it/s, loss=0.973, v_num=1, val_loss_epoch=0.738, train_loss=1.200]#015Epoch 0:  63%|██████▎   | 5790/9137 [10:41<06:10,  9.03it/s, loss=0.973, v_num=1, val_loss_epoch=0.738, train_loss=1.200]#015Epoch 0:  63%|██████▎   | 5790/9137 [10:41<06:10,  9.03it/s, loss=0.897, v_num=1, val_loss_epoch=0.738, train_loss=0.798]#015Epoch 0:  64%|██████▎   | 5820/9137 [10:44<06:07,  9.03it/s, loss=0.897, v_num=1, val_loss_epoch=0.738, train_loss=0.798]#015Epoch 0:  64%|██████▎   | 5820/9137 [10:44<06:07,  9.03it/s, loss=0.99, v_num=1, val_loss_epoch=0.7

[34m0/9137 [13:51<03:01,  9.02it/s, loss=0.831, v_num=1, val_loss_epoch=0.738, train_loss=1.220]#015Epoch 0:  82%|████████▏ | 7530/9137 [13:54<02:58,  9.03it/s, loss=0.831, v_num=1, val_loss_epoch=0.738, train_loss=1.220]#015Epoch 0:  82%|████████▏ | 7530/9137 [13:54<02:58,  9.03it/s, loss=0.871, v_num=1, val_loss_epoch=0.738, train_loss=1.120]#015Epoch 0:  83%|████████▎ | 7560/9137 [13:57<02:54,  9.02it/s, loss=0.871, v_num=1, val_loss_epoch=0.738, train_loss=1.120]#015Epoch 0:  83%|████████▎ | 7560/9137 [13:57<02:54,  9.02it/s, loss=0.951, v_num=1, val_loss_epoch=0.738, train_loss=0.851]#015Epoch 0:  83%|████████▎ | 7590/9137 [14:00<02:51,  9.03it/s, loss=0.951, v_num=1, val_loss_epoch=0.738, train_loss=0.851]#015Epoch 0:  83%|████████▎ | 7590/9137 [14:00<02:51,  9.03it/s, loss=0.898, v_num=1, val_loss_epoch=0.738, train_loss=0.919]#015Epoch 0:  83%|████████▎ | 7620/9137 [14:04<02:48,  9.03it/s, loss=0.898, v_num=1, val_loss_epoch=0.738, train_loss=0.919]#015Epoch 0:  83%|████████▎ 

[34m#015Validating:   6%|▌         | 30/483 [00:02<00:43, 10.40it/s]#033[A#015Epoch 0:  95%|█████████▌| 8700/9137 [16:02<00:48,  9.04it/s, loss=0.839, v_num=1, val_loss_epoch=0.738, train_loss=0.459][0m
[34m#015Validating:  12%|█▏        | 60/483 [00:05<00:40, 10.44it/s]#033[A#015Epoch 0:  96%|█████████▌| 8730/9137 [16:05<00:44,  9.05it/s, loss=0.839, v_num=1, val_loss_epoch=0.738, train_loss=0.459][0m
[34m#015Validating:  19%|█▊        | 90/483 [00:08<00:37, 10.37it/s]#033[A#015Epoch 0:  96%|█████████▌| 8760/9137 [16:08<00:41,  9.05it/s, loss=0.839, v_num=1, val_loss_epoch=0.738, train_loss=0.459][0m
[34m#015Validating:  25%|██▍       | 120/483 [00:11<00:34, 10.40it/s]#033[A#015Epoch 0:  96%|█████████▌| 8790/9137 [16:10<00:38,  9.05it/s, loss=0.839, v_num=1, val_loss_epoch=0.738, train_loss=0.459][0m
[34m#015Validating:  31%|███       | 150/483 [00:14<00:31, 10.49it/s]#033[A#015Epoch 0:  97%|█████████▋| 8820/9137 [16:13<00:34,  9.06it/s, loss=0.839, v_num=1, val_loss_epoch=0.

[34m.277, train_loss=0.845, val_loss_step=0.173]#015Epoch 1:  18%|█▊        | 1680/9137 [03:06<13:48,  9.00it/s, loss=0.804, v_num=1, val_loss_epoch=0.277, train_loss=0.512, val_loss_step=0.173]#015Epoch 1:  19%|█▊        | 1710/9137 [03:10<13:45,  9.00it/s, loss=0.804, v_num=1, val_loss_epoch=0.277, train_loss=0.512, val_loss_step=0.173]#015Epoch 1:  19%|█▊        | 1710/9137 [03:10<13:45,  9.00it/s, loss=0.812, v_num=1, val_loss_epoch=0.277, train_loss=1.020, val_loss_step=0.173]#015Epoch 1:  19%|█▉        | 1740/9137 [03:13<13:41,  9.00it/s, loss=0.812, v_num=1, val_loss_epoch=0.277, train_loss=1.020, val_loss_step=0.173]#015Epoch 1:  19%|█▉        | 1740/9137 [03:13<13:41,  9.00it/s, loss=0.857, v_num=1, val_loss_epoch=0.277, train_loss=0.721, val_loss_step=0.173]#015Epoch 1:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.857, v_num=1, val_loss_epoch=0.277, train_loss=0.721, val_loss_step=0.173]#015Epoch 1:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.795

[34mv_num=1, val_loss_epoch=0.277, train_loss=0.970, val_loss_step=0.173]#015Epoch 1:  36%|███▋      | 3330/9137 [06:09<10:43,  9.02it/s, loss=0.778, v_num=1, val_loss_epoch=0.277, train_loss=0.839, val_loss_step=0.173]#015Epoch 1:  37%|███▋      | 3360/9137 [06:12<10:40,  9.02it/s, loss=0.778, v_num=1, val_loss_epoch=0.277, train_loss=0.839, val_loss_step=0.173]#015Epoch 1:  37%|███▋      | 3360/9137 [06:12<10:40,  9.02it/s, loss=0.713, v_num=1, val_loss_epoch=0.277, train_loss=0.939, val_loss_step=0.173]#015Epoch 1:  37%|███▋      | 3390/9137 [06:15<10:37,  9.02it/s, loss=0.713, v_num=1, val_loss_epoch=0.277, train_loss=0.939, val_loss_step=0.173]#015Epoch 1:  37%|███▋      | 3390/9137 [06:15<10:37,  9.02it/s, loss=0.829, v_num=1, val_loss_epoch=0.277, train_loss=1.330, val_loss_step=0.173]#015Epoch 1:  37%|███▋      | 3420/9137 [06:19<10:33,  9.02it/s, loss=0.829, v_num=1, val_loss_epoch=0.277, train_loss=1.330, val_loss_step=0.173]#015Epoch 1:  37%|███▋      | 3420/9137 [06:19<10:

[34m, val_loss_step=0.173]#015Epoch 1:  54%|█████▍    | 4950/9137 [09:08<07:44,  9.02it/s, loss=0.873, v_num=1, val_loss_epoch=0.277, train_loss=1.080, val_loss_step=0.173]#015Epoch 1:  54%|█████▍    | 4950/9137 [09:08<07:44,  9.02it/s, loss=0.859, v_num=1, val_loss_epoch=0.277, train_loss=0.921, val_loss_step=0.173]#015Epoch 1:  55%|█████▍    | 4980/9137 [09:12<07:40,  9.02it/s, loss=0.859, v_num=1, val_loss_epoch=0.277, train_loss=0.921, val_loss_step=0.173]#015Epoch 1:  55%|█████▍    | 4980/9137 [09:12<07:40,  9.02it/s, loss=0.736, v_num=1, val_loss_epoch=0.277, train_loss=0.554, val_loss_step=0.173]#015Epoch 1:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.736, v_num=1, val_loss_epoch=0.277, train_loss=0.554, val_loss_step=0.173]#015Epoch 1:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.842, v_num=1, val_loss_epoch=0.277, train_loss=0.804, val_loss_step=0.173]#015Epoch 1:  55%|█████▌    | 5040/9137 [09:18<07:34,  9.02it/s, loss=0.842, v_num=1, val_loss_ep

[34mss=0.572, val_loss_step=0.173]#015Epoch 1:  71%|███████   | 6510/9137 [12:01<04:51,  9.02it/s, loss=0.765, v_num=1, val_loss_epoch=0.277, train_loss=1.040, val_loss_step=0.173]#015Epoch 1:  72%|███████▏  | 6540/9137 [12:05<04:47,  9.02it/s, loss=0.765, v_num=1, val_loss_epoch=0.277, train_loss=1.040, val_loss_step=0.173]#015Epoch 1:  72%|███████▏  | 6540/9137 [12:05<04:47,  9.02it/s, loss=0.785, v_num=1, val_loss_epoch=0.277, train_loss=0.748, val_loss_step=0.173]#015Epoch 1:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.785, v_num=1, val_loss_epoch=0.277, train_loss=0.748, val_loss_step=0.173]#015Epoch 1:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.865, v_num=1, val_loss_epoch=0.277, train_loss=1.270, val_loss_step=0.173]#015Epoch 1:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.865, v_num=1, val_loss_epoch=0.277, train_loss=1.270, val_loss_step=0.173]#015Epoch 1:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.834, v_num=1, val

[34m02:01,  9.02it/s, loss=0.77, v_num=1, val_loss_epoch=0.277, train_loss=0.399, val_loss_step=0.173] #015Epoch 1:  88%|████████▊ | 8070/9137 [14:54<01:58,  9.02it/s, loss=0.77, v_num=1, val_loss_epoch=0.277, train_loss=0.399, val_loss_step=0.173]#015Epoch 1:  88%|████████▊ | 8070/9137 [14:54<01:58,  9.02it/s, loss=0.845, v_num=1, val_loss_epoch=0.277, train_loss=1.050, val_loss_step=0.173]#015Epoch 1:  89%|████████▊ | 8100/9137 [14:58<01:54,  9.02it/s, loss=0.845, v_num=1, val_loss_epoch=0.277, train_loss=1.050, val_loss_step=0.173]#015Epoch 1:  89%|████████▊ | 8100/9137 [14:58<01:54,  9.02it/s, loss=0.82, v_num=1, val_loss_epoch=0.277, train_loss=0.777, val_loss_step=0.173] #015Epoch 1:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.82, v_num=1, val_loss_epoch=0.277, train_loss=0.777, val_loss_step=0.173]#015Epoch 1:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.687, v_num=1, val_loss_epoch=0.277, train_loss=1.210, val_loss_step=0.173]#015Epoch 1:  89%|████

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:14, 10.25it/s]#033[A#015Epoch 1:  99%|█████████▊| 9000/9137 [16:31<00:15,  9.07it/s, loss=0.724, v_num=1, val_loss_epoch=0.277, train_loss=1.160, val_loss_step=0.173][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.38it/s]#033[A#015Epoch 1:  99%|█████████▉| 9030/9137 [16:34<00:11,  9.08it/s, loss=0.724, v_num=1, val_loss_epoch=0.277, train_loss=1.160, val_loss_step=0.173][0m
[34m#015Validating:  81%|████████  | 390/483 [00:37<00:08, 10.41it/s]#033[A#015Epoch 1:  99%|█████████▉| 9060/9137 [16:37<00:08,  9.08it/s, loss=0.724, v_num=1, val_loss_epoch=0.277, train_loss=1.160, val_loss_step=0.173][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:39<00:05, 10.55it/s]#033[A#015Epoch 1:  99%|█████████▉| 9090/9137 [16:40<00:05,  9.09it/s, loss=0.724, v_num=1, val_loss_epoch=0.277, train_loss=1.160, val_loss_step=0.173][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:42<00:03, 10.67it/s]#033[A#015Epoch 1: 100

[34moch=0.250, train_loss=0.562, val_loss_step=0.164]#015Epoch 2:  18%|█▊        | 1680/9137 [03:06<13:47,  9.01it/s, loss=0.708, v_num=1, val_loss_epoch=0.250, train_loss=0.928, val_loss_step=0.164]#015Epoch 2:  19%|█▊        | 1710/9137 [03:09<13:44,  9.01it/s, loss=0.708, v_num=1, val_loss_epoch=0.250, train_loss=0.928, val_loss_step=0.164]#015Epoch 2:  19%|█▊        | 1710/9137 [03:09<13:44,  9.01it/s, loss=0.7, v_num=1, val_loss_epoch=0.250, train_loss=0.841, val_loss_step=0.164]  #015Epoch 2:  19%|█▉        | 1740/9137 [03:13<13:40,  9.01it/s, loss=0.7, v_num=1, val_loss_epoch=0.250, train_loss=0.841, val_loss_step=0.164]#015Epoch 2:  19%|█▉        | 1740/9137 [03:13<13:40,  9.01it/s, loss=0.714, v_num=1, val_loss_epoch=0.250, train_loss=0.756, val_loss_step=0.164]#015Epoch 2:  19%|█▉        | 1770/9137 [03:16<13:37,  9.01it/s, loss=0.714, v_num=1, val_loss_epoch=0.250, train_loss=0.756, val_loss_step=0.164]#015Epoch 2:  19%|█▉        | 1770/9137 [03:16<13:37,  9.01it/s, loss=0.

[34m666, v_num=1, val_loss_epoch=0.250, train_loss=0.646, val_loss_step=0.164]#015Epoch 2:  36%|███▋      | 3330/9137 [06:09<10:43,  9.02it/s, loss=0.676, v_num=1, val_loss_epoch=0.250, train_loss=0.669, val_loss_step=0.164]#015Epoch 2:  37%|███▋      | 3360/9137 [06:12<10:40,  9.02it/s, loss=0.676, v_num=1, val_loss_epoch=0.250, train_loss=0.669, val_loss_step=0.164]#015Epoch 2:  37%|███▋      | 3360/9137 [06:12<10:40,  9.02it/s, loss=0.667, v_num=1, val_loss_epoch=0.250, train_loss=0.663, val_loss_step=0.164]#015Epoch 2:  37%|███▋      | 3390/9137 [06:15<10:37,  9.02it/s, loss=0.667, v_num=1, val_loss_epoch=0.250, train_loss=0.663, val_loss_step=0.164]#015Epoch 2:  37%|███▋      | 3390/9137 [06:15<10:37,  9.02it/s, loss=0.71, v_num=1, val_loss_epoch=0.250, train_loss=1.120, val_loss_step=0.164] #015Epoch 2:  37%|███▋      | 3420/9137 [06:19<10:33,  9.02it/s, loss=0.71, v_num=1, val_loss_epoch=0.250, train_loss=1.120, val_loss_step=0.164]#015Epoch 2:  37%|███▋      | 3420/9137 [06:19

[34mss=1.080, val_loss_step=0.164]#015Epoch 2:  54%|█████▍    | 4950/9137 [09:09<07:44,  9.02it/s, loss=0.714, v_num=1, val_loss_epoch=0.250, train_loss=1.080, val_loss_step=0.164]#015Epoch 2:  54%|█████▍    | 4950/9137 [09:09<07:44,  9.02it/s, loss=0.701, v_num=1, val_loss_epoch=0.250, train_loss=0.487, val_loss_step=0.164]#015Epoch 2:  55%|█████▍    | 4980/9137 [09:12<07:41,  9.02it/s, loss=0.701, v_num=1, val_loss_epoch=0.250, train_loss=0.487, val_loss_step=0.164]#015Epoch 2:  55%|█████▍    | 4980/9137 [09:12<07:41,  9.02it/s, loss=0.772, v_num=1, val_loss_epoch=0.250, train_loss=0.572, val_loss_step=0.164]#015Epoch 2:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.772, v_num=1, val_loss_epoch=0.250, train_loss=0.572, val_loss_step=0.164]#015Epoch 2:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.731, v_num=1, val_loss_epoch=0.250, train_loss=0.429, val_loss_step=0.164]#015Epoch 2:  55%|█████▌    | 5040/9137 [09:18<07:34,  9.02it/s, loss=0.731, v_num=1, val

[34m train_loss=0.735, val_loss_step=0.164]#015Epoch 2:  71%|███████   | 6510/9137 [12:02<04:51,  9.01it/s, loss=0.676, v_num=1, val_loss_epoch=0.250, train_loss=0.665, val_loss_step=0.164]#015Epoch 2:  72%|███████▏  | 6540/9137 [12:05<04:48,  9.01it/s, loss=0.676, v_num=1, val_loss_epoch=0.250, train_loss=0.665, val_loss_step=0.164]#015Epoch 2:  72%|███████▏  | 6540/9137 [12:05<04:48,  9.01it/s, loss=0.764, v_num=1, val_loss_epoch=0.250, train_loss=0.543, val_loss_step=0.164]#015Epoch 2:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.01it/s, loss=0.764, v_num=1, val_loss_epoch=0.250, train_loss=0.543, val_loss_step=0.164]#015Epoch 2:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.01it/s, loss=0.731, v_num=1, val_loss_epoch=0.250, train_loss=0.640, val_loss_step=0.164]#015Epoch 2:  72%|███████▏  | 6600/9137 [12:12<04:41,  9.01it/s, loss=0.731, v_num=1, val_loss_epoch=0.250, train_loss=0.640, val_loss_step=0.164]#015Epoch 2:  72%|███████▏  | 6600/9137 [12:12<04:41,  9.01it/s, loss=0.718, v_n

[34m7 [14:51<02:01,  9.02it/s, loss=0.696, v_num=1, val_loss_epoch=0.250, train_loss=0.750, val_loss_step=0.164]#015Epoch 2:  88%|████████▊ | 8070/9137 [14:54<01:58,  9.02it/s, loss=0.696, v_num=1, val_loss_epoch=0.250, train_loss=0.750, val_loss_step=0.164]#015Epoch 2:  88%|████████▊ | 8070/9137 [14:55<01:58,  9.02it/s, loss=0.701, v_num=1, val_loss_epoch=0.250, train_loss=0.592, val_loss_step=0.164]#015Epoch 2:  89%|████████▊ | 8100/9137 [14:58<01:55,  9.02it/s, loss=0.701, v_num=1, val_loss_epoch=0.250, train_loss=0.592, val_loss_step=0.164]#015Epoch 2:  89%|████████▊ | 8100/9137 [14:58<01:55,  9.02it/s, loss=0.751, v_num=1, val_loss_epoch=0.250, train_loss=0.721, val_loss_step=0.164]#015Epoch 2:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.751, v_num=1, val_loss_epoch=0.250, train_loss=0.721, val_loss_step=0.164]#015Epoch 2:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.665, v_num=1, val_loss_epoch=0.250, train_loss=1.040, val_loss_step=0.164]#015Epoch 2

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:14, 10.27it/s]#033[A#015Epoch 2:  99%|█████████▊| 9000/9137 [16:32<00:15,  9.07it/s, loss=0.652, v_num=1, val_loss_epoch=0.250, train_loss=0.921, val_loss_step=0.164][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.35it/s]#033[A#015Epoch 2:  99%|█████████▉| 9030/9137 [16:34<00:11,  9.08it/s, loss=0.652, v_num=1, val_loss_epoch=0.250, train_loss=0.921, val_loss_step=0.164][0m
[34m#015Validating:  81%|████████  | 390/483 [00:37<00:08, 10.35it/s]#033[A#015Epoch 2:  99%|█████████▉| 9060/9137 [16:37<00:08,  9.08it/s, loss=0.652, v_num=1, val_loss_epoch=0.250, train_loss=0.921, val_loss_step=0.164][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:40<00:06, 10.50it/s]#033[A#015Epoch 2:  99%|█████████▉| 9090/9137 [16:40<00:05,  9.08it/s, loss=0.652, v_num=1, val_loss_epoch=0.250, train_loss=0.921, val_loss_step=0.164][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:42<00:03, 10.63it/s]#033[A#015Epoch 2: 100

[34m=0.245, train_loss=0.924, val_loss_step=0.162]#015Epoch 3:  18%|█▊        | 1680/9137 [03:06<13:48,  9.00it/s, loss=0.629, v_num=1, val_loss_epoch=0.245, train_loss=0.370, val_loss_step=0.162]#015Epoch 3:  19%|█▊        | 1710/9137 [03:09<13:45,  9.00it/s, loss=0.629, v_num=1, val_loss_epoch=0.245, train_loss=0.370, val_loss_step=0.162]#015Epoch 3:  19%|█▊        | 1710/9137 [03:09<13:45,  9.00it/s, loss=0.673, v_num=1, val_loss_epoch=0.245, train_loss=0.836, val_loss_step=0.162]#015Epoch 3:  19%|█▉        | 1740/9137 [03:13<13:41,  9.00it/s, loss=0.673, v_num=1, val_loss_epoch=0.245, train_loss=0.836, val_loss_step=0.162]#015Epoch 3:  19%|█▉        | 1740/9137 [03:13<13:41,  9.00it/s, loss=0.609, v_num=1, val_loss_epoch=0.245, train_loss=0.693, val_loss_step=0.162]#015Epoch 3:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.609, v_num=1, val_loss_epoch=0.245, train_loss=0.693, val_loss_step=0.162]#015Epoch 3:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.6

[34m.678, v_num=1, val_loss_epoch=0.245, train_loss=0.772, val_loss_step=0.162]#015Epoch 3:  36%|███▋      | 3330/9137 [06:09<10:44,  9.01it/s, loss=0.607, v_num=1, val_loss_epoch=0.245, train_loss=0.765, val_loss_step=0.162]#015Epoch 3:  37%|███▋      | 3360/9137 [06:13<10:41,  9.00it/s, loss=0.607, v_num=1, val_loss_epoch=0.245, train_loss=0.765, val_loss_step=0.162]#015Epoch 3:  37%|███▋      | 3360/9137 [06:13<10:41,  9.00it/s, loss=0.621, v_num=1, val_loss_epoch=0.245, train_loss=0.655, val_loss_step=0.162]#015Epoch 3:  37%|███▋      | 3390/9137 [06:16<10:38,  9.00it/s, loss=0.621, v_num=1, val_loss_epoch=0.245, train_loss=0.655, val_loss_step=0.162]#015Epoch 3:  37%|███▋      | 3390/9137 [06:16<10:38,  9.00it/s, loss=0.682, v_num=1, val_loss_epoch=0.245, train_loss=0.676, val_loss_step=0.162]#015Epoch 3:  37%|███▋      | 3420/9137 [06:19<10:34,  9.00it/s, loss=0.682, v_num=1, val_loss_epoch=0.245, train_loss=0.676, val_loss_step=0.162]#015Epoch 3:  37%|███▋      | 3420/9137 [06:

[34mloss=0.761, val_loss_step=0.162]#015Epoch 3:  54%|█████▍    | 4950/9137 [09:09<07:45,  9.00it/s, loss=0.546, v_num=1, val_loss_epoch=0.245, train_loss=0.761, val_loss_step=0.162]#015Epoch 3:  54%|█████▍    | 4950/9137 [09:09<07:45,  9.00it/s, loss=0.665, v_num=1, val_loss_epoch=0.245, train_loss=1.350, val_loss_step=0.162]#015Epoch 3:  55%|█████▍    | 4980/9137 [09:13<07:41,  9.00it/s, loss=0.665, v_num=1, val_loss_epoch=0.245, train_loss=1.350, val_loss_step=0.162]#015Epoch 3:  55%|█████▍    | 4980/9137 [09:13<07:41,  9.00it/s, loss=0.681, v_num=1, val_loss_epoch=0.245, train_loss=0.985, val_loss_step=0.162]#015Epoch 3:  55%|█████▍    | 5010/9137 [09:16<07:38,  9.00it/s, loss=0.681, v_num=1, val_loss_epoch=0.245, train_loss=0.985, val_loss_step=0.162]#015Epoch 3:  55%|█████▍    | 5010/9137 [09:16<07:38,  9.00it/s, loss=0.567, v_num=1, val_loss_epoch=0.245, train_loss=0.538, val_loss_step=0.162]#015Epoch 3:  55%|█████▌    | 5040/9137 [09:19<07:35,  9.00it/s, loss=0.567, v_num=1, v

[34mrain_loss=0.512, val_loss_step=0.162]#015Epoch 3:  71%|███████   | 6510/9137 [12:02<04:51,  9.01it/s, loss=0.545, v_num=1, val_loss_epoch=0.245, train_loss=0.734, val_loss_step=0.162]#015Epoch 3:  72%|███████▏  | 6540/9137 [12:06<04:48,  9.01it/s, loss=0.545, v_num=1, val_loss_epoch=0.245, train_loss=0.734, val_loss_step=0.162]#015Epoch 3:  72%|███████▏  | 6540/9137 [12:06<04:48,  9.01it/s, loss=0.671, v_num=1, val_loss_epoch=0.245, train_loss=0.629, val_loss_step=0.162]#015Epoch 3:  72%|███████▏  | 6570/9137 [12:09<04:45,  9.01it/s, loss=0.671, v_num=1, val_loss_epoch=0.245, train_loss=0.629, val_loss_step=0.162]#015Epoch 3:  72%|███████▏  | 6570/9137 [12:09<04:45,  9.01it/s, loss=0.683, v_num=1, val_loss_epoch=0.245, train_loss=0.932, val_loss_step=0.162]#015Epoch 3:  72%|███████▏  | 6600/9137 [12:12<04:41,  9.01it/s, loss=0.683, v_num=1, val_loss_epoch=0.245, train_loss=0.932, val_loss_step=0.162]#015Epoch 3:  72%|███████▏  | 6600/9137 [12:12<04:41,  9.01it/s, loss=0.561, v_num

[34m4:52<02:01,  9.01it/s, loss=0.61, v_num=1, val_loss_epoch=0.245, train_loss=0.481, val_loss_step=0.162] #015Epoch 3:  88%|████████▊ | 8070/9137 [14:56<01:58,  9.01it/s, loss=0.61, v_num=1, val_loss_epoch=0.245, train_loss=0.481, val_loss_step=0.162]#015Epoch 3:  88%|████████▊ | 8070/9137 [14:56<01:58,  9.01it/s, loss=0.732, v_num=1, val_loss_epoch=0.245, train_loss=0.771, val_loss_step=0.162]#015Epoch 3:  89%|████████▊ | 8100/9137 [14:59<01:55,  9.01it/s, loss=0.732, v_num=1, val_loss_epoch=0.245, train_loss=0.771, val_loss_step=0.162]#015Epoch 3:  89%|████████▊ | 8100/9137 [14:59<01:55,  9.01it/s, loss=0.652, v_num=1, val_loss_epoch=0.245, train_loss=0.452, val_loss_step=0.162]#015Epoch 3:  89%|████████▉ | 8130/9137 [15:02<01:51,  9.01it/s, loss=0.652, v_num=1, val_loss_epoch=0.245, train_loss=0.452, val_loss_step=0.162]#015Epoch 3:  89%|████████▉ | 8130/9137 [15:02<01:51,  9.01it/s, loss=0.583, v_num=1, val_loss_epoch=0.245, train_loss=0.416, val_loss_step=0.162]#015Epoch 3:  89

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:14, 10.30it/s]#033[A#015Epoch 3:  99%|█████████▊| 9000/9137 [16:32<00:15,  9.06it/s, loss=0.627, v_num=1, val_loss_epoch=0.245, train_loss=0.586, val_loss_step=0.162][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.42it/s]#033[A#015Epoch 3:  99%|█████████▉| 9030/9137 [16:35<00:11,  9.07it/s, loss=0.627, v_num=1, val_loss_epoch=0.245, train_loss=0.586, val_loss_step=0.162][0m
[34m#015Validating:  81%|████████  | 390/483 [00:37<00:08, 10.44it/s]#033[A#015Epoch 3:  99%|█████████▉| 9060/9137 [16:38<00:08,  9.07it/s, loss=0.627, v_num=1, val_loss_epoch=0.245, train_loss=0.586, val_loss_step=0.162][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:39<00:05, 10.58it/s]#033[A#015Epoch 3:  99%|█████████▉| 9090/9137 [16:41<00:05,  9.08it/s, loss=0.627, v_num=1, val_loss_epoch=0.245, train_loss=0.586, val_loss_step=0.162][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:42<00:03, 10.68it/s]#033[A#015Epoch 3: 100

[34mpoch=0.249, train_loss=0.622, val_loss_step=0.146]#015Epoch 4:  18%|█▊        | 1680/9137 [03:06<13:47,  9.01it/s, loss=0.626, v_num=1, val_loss_epoch=0.249, train_loss=0.908, val_loss_step=0.146]#015Epoch 4:  19%|█▊        | 1710/9137 [03:09<13:44,  9.01it/s, loss=0.626, v_num=1, val_loss_epoch=0.249, train_loss=0.908, val_loss_step=0.146]#015Epoch 4:  19%|█▊        | 1710/9137 [03:09<13:44,  9.01it/s, loss=0.54, v_num=1, val_loss_epoch=0.249, train_loss=0.504, val_loss_step=0.146] #015Epoch 4:  19%|█▉        | 1740/9137 [03:13<13:41,  9.01it/s, loss=0.54, v_num=1, val_loss_epoch=0.249, train_loss=0.504, val_loss_step=0.146]#015Epoch 4:  19%|█▉        | 1740/9137 [03:13<13:41,  9.01it/s, loss=0.567, v_num=1, val_loss_epoch=0.249, train_loss=0.730, val_loss_step=0.146]#015Epoch 4:  19%|█▉        | 1770/9137 [03:16<13:37,  9.01it/s, loss=0.567, v_num=1, val_loss_epoch=0.249, train_loss=0.730, val_loss_step=0.146]#015Epoch 4:  19%|█▉        | 1770/9137 [03:16<13:37,  9.01it/s, loss=

[34mloss=0.547, v_num=1, val_loss_epoch=0.249, train_loss=0.364, val_loss_step=0.146]#015Epoch 4:  36%|███▋      | 3330/9137 [06:09<10:44,  9.01it/s, loss=0.52, v_num=1, val_loss_epoch=0.249, train_loss=0.488, val_loss_step=0.146] #015Epoch 4:  37%|███▋      | 3360/9137 [06:12<10:40,  9.01it/s, loss=0.52, v_num=1, val_loss_epoch=0.249, train_loss=0.488, val_loss_step=0.146]#015Epoch 4:  37%|███▋      | 3360/9137 [06:12<10:40,  9.01it/s, loss=0.511, v_num=1, val_loss_epoch=0.249, train_loss=0.617, val_loss_step=0.146]#015Epoch 4:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.511, v_num=1, val_loss_epoch=0.249, train_loss=0.617, val_loss_step=0.146]#015Epoch 4:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.579, v_num=1, val_loss_epoch=0.249, train_loss=1.000, val_loss_step=0.146]#015Epoch 4:  37%|███▋      | 3420/9137 [06:19<10:34,  9.01it/s, loss=0.579, v_num=1, val_loss_epoch=0.249, train_loss=1.000, val_loss_step=0.146]#015Epoch 4:  37%|███▋      | 3420/9137

[34m train_loss=0.475, val_loss_step=0.146]#015Epoch 4:  54%|█████▍    | 4950/9137 [09:09<07:44,  9.01it/s, loss=0.598, v_num=1, val_loss_epoch=0.249, train_loss=0.475, val_loss_step=0.146]#015Epoch 4:  54%|█████▍    | 4950/9137 [09:09<07:44,  9.01it/s, loss=0.538, v_num=1, val_loss_epoch=0.249, train_loss=0.511, val_loss_step=0.146]#015Epoch 4:  55%|█████▍    | 4980/9137 [09:12<07:41,  9.01it/s, loss=0.538, v_num=1, val_loss_epoch=0.249, train_loss=0.511, val_loss_step=0.146]#015Epoch 4:  55%|█████▍    | 4980/9137 [09:12<07:41,  9.01it/s, loss=0.616, v_num=1, val_loss_epoch=0.249, train_loss=0.407, val_loss_step=0.146]#015Epoch 4:  55%|█████▍    | 5010/9137 [09:16<07:38,  9.01it/s, loss=0.616, v_num=1, val_loss_epoch=0.249, train_loss=0.407, val_loss_step=0.146]#015Epoch 4:  55%|█████▍    | 5010/9137 [09:16<07:38,  9.01it/s, loss=0.676, v_num=1, val_loss_epoch=0.249, train_loss=0.466, val_loss_step=0.146]#015Epoch 4:  55%|█████▌    | 5040/9137 [09:19<07:34,  9.01it/s, loss=0.676, v_n

[34mh=0.249, train_loss=0.812, val_loss_step=0.146]#015Epoch 4:  71%|███████   | 6510/9137 [12:02<04:51,  9.01it/s, loss=0.514, v_num=1, val_loss_epoch=0.249, train_loss=0.559, val_loss_step=0.146]#015Epoch 4:  72%|███████▏  | 6540/9137 [12:05<04:48,  9.01it/s, loss=0.514, v_num=1, val_loss_epoch=0.249, train_loss=0.559, val_loss_step=0.146]#015Epoch 4:  72%|███████▏  | 6540/9137 [12:05<04:48,  9.01it/s, loss=0.595, v_num=1, val_loss_epoch=0.249, train_loss=0.734, val_loss_step=0.146]#015Epoch 4:  72%|███████▏  | 6570/9137 [12:09<04:44,  9.01it/s, loss=0.595, v_num=1, val_loss_epoch=0.249, train_loss=0.734, val_loss_step=0.146]#015Epoch 4:  72%|███████▏  | 6570/9137 [12:09<04:44,  9.01it/s, loss=0.659, v_num=1, val_loss_epoch=0.249, train_loss=0.344, val_loss_step=0.146]#015Epoch 4:  72%|███████▏  | 6600/9137 [12:12<04:41,  9.01it/s, loss=0.659, v_num=1, val_loss_epoch=0.249, train_loss=0.344, val_loss_step=0.146]#015Epoch 4:  72%|███████▏  | 6600/9137 [12:12<04:41,  9.01it/s, loss=0.

[34m040/9137 [14:52<02:01,  9.01it/s, loss=0.506, v_num=1, val_loss_epoch=0.249, train_loss=0.404, val_loss_step=0.146]#015Epoch 4:  88%|████████▊ | 8070/9137 [14:55<01:58,  9.01it/s, loss=0.506, v_num=1, val_loss_epoch=0.249, train_loss=0.404, val_loss_step=0.146]#015Epoch 4:  88%|████████▊ | 8070/9137 [14:55<01:58,  9.01it/s, loss=0.549, v_num=1, val_loss_epoch=0.249, train_loss=0.442, val_loss_step=0.146]#015Epoch 4:  89%|████████▊ | 8100/9137 [14:59<01:55,  9.01it/s, loss=0.549, v_num=1, val_loss_epoch=0.249, train_loss=0.442, val_loss_step=0.146]#015Epoch 4:  89%|████████▊ | 8100/9137 [14:59<01:55,  9.01it/s, loss=0.549, v_num=1, val_loss_epoch=0.249, train_loss=0.655, val_loss_step=0.146]#015Epoch 4:  89%|████████▉ | 8130/9137 [15:02<01:51,  9.01it/s, loss=0.549, v_num=1, val_loss_epoch=0.249, train_loss=0.655, val_loss_step=0.146]#015Epoch 4:  89%|████████▉ | 8130/9137 [15:02<01:51,  9.01it/s, loss=0.591, v_num=1, val_loss_epoch=0.249, train_loss=0.548, val_loss_step=0.146]#015

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:14, 10.31it/s]#033[A#015Epoch 4:  99%|█████████▊| 9000/9137 [16:32<00:15,  9.07it/s, loss=0.6, v_num=1, val_loss_epoch=0.249, train_loss=0.376, val_loss_step=0.146][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.41it/s]#033[A#015Epoch 4:  99%|█████████▉| 9030/9137 [16:35<00:11,  9.07it/s, loss=0.6, v_num=1, val_loss_epoch=0.249, train_loss=0.376, val_loss_step=0.146][0m
[34m#015Validating:  81%|████████  | 390/483 [00:37<00:08, 10.42it/s]#033[A#015Epoch 4:  99%|█████████▉| 9060/9137 [16:38<00:08,  9.08it/s, loss=0.6, v_num=1, val_loss_epoch=0.249, train_loss=0.376, val_loss_step=0.146][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:39<00:05, 10.56it/s]#033[A#015Epoch 4:  99%|█████████▉| 9090/9137 [16:40<00:05,  9.08it/s, loss=0.6, v_num=1, val_loss_epoch=0.249, train_loss=0.376, val_loss_step=0.146][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:42<00:03, 10.67it/s]#033[A#015Epoch 4: 100%|██████

[34m248, train_loss=0.312, val_loss_step=0.161]#015Epoch 5:  18%|█▊        | 1680/9137 [03:06<13:48,  9.00it/s, loss=0.497, v_num=1, val_loss_epoch=0.248, train_loss=0.560, val_loss_step=0.161]#015Epoch 5:  19%|█▊        | 1710/9137 [03:09<13:45,  9.00it/s, loss=0.497, v_num=1, val_loss_epoch=0.248, train_loss=0.560, val_loss_step=0.161]#015Epoch 5:  19%|█▊        | 1710/9137 [03:09<13:45,  9.00it/s, loss=0.549, v_num=1, val_loss_epoch=0.248, train_loss=0.436, val_loss_step=0.161]#015Epoch 5:  19%|█▉        | 1740/9137 [03:13<13:42,  9.00it/s, loss=0.549, v_num=1, val_loss_epoch=0.248, train_loss=0.436, val_loss_step=0.161]#015Epoch 5:  19%|█▉        | 1740/9137 [03:13<13:42,  9.00it/s, loss=0.537, v_num=1, val_loss_epoch=0.248, train_loss=0.448, val_loss_step=0.161]#015Epoch 5:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.537, v_num=1, val_loss_epoch=0.248, train_loss=0.448, val_loss_step=0.161]#015Epoch 5:  19%|█▉        | 1770/9137 [03:16<13:38,  9.00it/s, loss=0.567,

[34mnum=1, val_loss_epoch=0.248, train_loss=0.420, val_loss_step=0.161]#015Epoch 5:  36%|███▋      | 3330/9137 [06:09<10:44,  9.01it/s, loss=0.525, v_num=1, val_loss_epoch=0.248, train_loss=0.509, val_loss_step=0.161]#015Epoch 5:  37%|███▋      | 3360/9137 [06:12<10:41,  9.01it/s, loss=0.525, v_num=1, val_loss_epoch=0.248, train_loss=0.509, val_loss_step=0.161]#015Epoch 5:  37%|███▋      | 3360/9137 [06:12<10:41,  9.01it/s, loss=0.453, v_num=1, val_loss_epoch=0.248, train_loss=0.275, val_loss_step=0.161]#015Epoch 5:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.453, v_num=1, val_loss_epoch=0.248, train_loss=0.275, val_loss_step=0.161]#015Epoch 5:  37%|███▋      | 3390/9137 [06:16<10:37,  9.01it/s, loss=0.487, v_num=1, val_loss_epoch=0.248, train_loss=0.344, val_loss_step=0.161]#015Epoch 5:  37%|███▋      | 3420/9137 [06:19<10:34,  9.01it/s, loss=0.487, v_num=1, val_loss_epoch=0.248, train_loss=0.344, val_loss_step=0.161]#015Epoch 5:  37%|███▋      | 3420/9137 [06:19<10:34

[34m2, val_loss_step=0.161]#015Epoch 5:  54%|█████▍    | 4950/9137 [09:09<07:44,  9.02it/s, loss=0.506, v_num=1, val_loss_epoch=0.248, train_loss=0.392, val_loss_step=0.161]#015Epoch 5:  54%|█████▍    | 4950/9137 [09:09<07:44,  9.02it/s, loss=0.458, v_num=1, val_loss_epoch=0.248, train_loss=0.307, val_loss_step=0.161]#015Epoch 5:  55%|█████▍    | 4980/9137 [09:12<07:41,  9.01it/s, loss=0.458, v_num=1, val_loss_epoch=0.248, train_loss=0.307, val_loss_step=0.161]#015Epoch 5:  55%|█████▍    | 4980/9137 [09:12<07:41,  9.01it/s, loss=0.55, v_num=1, val_loss_epoch=0.248, train_loss=0.507, val_loss_step=0.161] #015Epoch 5:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.55, v_num=1, val_loss_epoch=0.248, train_loss=0.507, val_loss_step=0.161]#015Epoch 5:  55%|█████▍    | 5010/9137 [09:15<07:37,  9.02it/s, loss=0.524, v_num=1, val_loss_epoch=0.248, train_loss=0.591, val_loss_step=0.161]#015Epoch 5:  55%|█████▌    | 5040/9137 [09:19<07:34,  9.02it/s, loss=0.524, v_num=1, val_loss_ep

[34mss=0.680, val_loss_step=0.161]#015Epoch 5:  71%|███████   | 6510/9137 [12:01<04:51,  9.02it/s, loss=0.557, v_num=1, val_loss_epoch=0.248, train_loss=0.544, val_loss_step=0.161]#015Epoch 5:  72%|███████▏  | 6540/9137 [12:05<04:47,  9.02it/s, loss=0.557, v_num=1, val_loss_epoch=0.248, train_loss=0.544, val_loss_step=0.161]#015Epoch 5:  72%|███████▏  | 6540/9137 [12:05<04:47,  9.02it/s, loss=0.449, v_num=1, val_loss_epoch=0.248, train_loss=0.471, val_loss_step=0.161]#015Epoch 5:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.449, v_num=1, val_loss_epoch=0.248, train_loss=0.471, val_loss_step=0.161]#015Epoch 5:  72%|███████▏  | 6570/9137 [12:08<04:44,  9.02it/s, loss=0.519, v_num=1, val_loss_epoch=0.248, train_loss=0.459, val_loss_step=0.161]#015Epoch 5:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.519, v_num=1, val_loss_epoch=0.248, train_loss=0.459, val_loss_step=0.161]#015Epoch 5:  72%|███████▏  | 6600/9137 [12:11<04:41,  9.02it/s, loss=0.497, v_num=1, val

[34m:01,  9.02it/s, loss=0.51, v_num=1, val_loss_epoch=0.248, train_loss=0.736, val_loss_step=0.161] #015Epoch 5:  88%|████████▊ | 8070/9137 [14:55<01:58,  9.02it/s, loss=0.51, v_num=1, val_loss_epoch=0.248, train_loss=0.736, val_loss_step=0.161]#015Epoch 5:  88%|████████▊ | 8070/9137 [14:55<01:58,  9.02it/s, loss=0.593, v_num=1, val_loss_epoch=0.248, train_loss=0.609, val_loss_step=0.161]#015Epoch 5:  89%|████████▊ | 8100/9137 [14:58<01:55,  9.02it/s, loss=0.593, v_num=1, val_loss_epoch=0.248, train_loss=0.609, val_loss_step=0.161]#015Epoch 5:  89%|████████▊ | 8100/9137 [14:58<01:55,  9.02it/s, loss=0.46, v_num=1, val_loss_epoch=0.248, train_loss=0.454, val_loss_step=0.161] #015Epoch 5:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.46, v_num=1, val_loss_epoch=0.248, train_loss=0.454, val_loss_step=0.161]#015Epoch 5:  89%|████████▉ | 8130/9137 [15:01<01:51,  9.02it/s, loss=0.5, v_num=1, val_loss_epoch=0.248, train_loss=0.313, val_loss_step=0.161] #015Epoch 5:  89%|███████

[34m#015Validating:  68%|██████▊   | 330/483 [00:31<00:15, 10.17it/s]#033[A#015Epoch 5:  99%|█████████▊| 9000/9137 [16:32<00:15,  9.07it/s, loss=0.556, v_num=1, val_loss_epoch=0.248, train_loss=0.442, val_loss_step=0.161][0m
[34m#015Validating:  75%|███████▍  | 360/483 [00:34<00:11, 10.28it/s]#033[A#015Epoch 5:  99%|█████████▉| 9030/9137 [16:35<00:11,  9.07it/s, loss=0.556, v_num=1, val_loss_epoch=0.248, train_loss=0.442, val_loss_step=0.161][0m
[34m#015Validating:  81%|████████  | 390/483 [00:37<00:09, 10.30it/s]#033[A#015Epoch 5:  99%|█████████▉| 9060/9137 [16:38<00:08,  9.08it/s, loss=0.556, v_num=1, val_loss_epoch=0.248, train_loss=0.442, val_loss_step=0.161][0m
[34m#015Validating:  87%|████████▋ | 420/483 [00:40<00:06, 10.43it/s]#033[A#015Epoch 5:  99%|█████████▉| 9090/9137 [16:40<00:05,  9.08it/s, loss=0.556, v_num=1, val_loss_epoch=0.248, train_loss=0.442, val_loss_step=0.161][0m
[34m#015Validating:  93%|█████████▎| 450/483 [00:43<00:03, 10.55it/s]#033[A#015Epoch 5: 100

[34mrain_loss=1.150]#015Epoch 0:  33%|███▎      | 1950/5888 [03:36<07:16,  9.01it/s, loss=1.19, v_num=0, val_loss_epoch=0.672, train_loss=1.150]#015Epoch 0:  33%|███▎      | 1950/5888 [03:36<07:16,  9.01it/s, loss=1.28, v_num=0, val_loss_epoch=0.672, train_loss=0.981]#015Epoch 0:  34%|███▎      | 1980/5888 [03:39<07:13,  9.02it/s, loss=1.28, v_num=0, val_loss_epoch=0.672, train_loss=0.981]#015Epoch 0:  34%|███▎      | 1980/5888 [03:39<07:13,  9.02it/s, loss=1.14, v_num=0, val_loss_epoch=0.672, train_loss=1.670]#015Epoch 0:  34%|███▍      | 2010/5888 [03:42<07:10,  9.01it/s, loss=1.14, v_num=0, val_loss_epoch=0.672, train_loss=1.670]#015Epoch 0:  34%|███▍      | 2010/5888 [03:42<07:10,  9.01it/s, loss=1.21, v_num=0, val_loss_epoch=0.672, train_loss=0.955]#015Epoch 0:  35%|███▍      | 2040/5888 [03:46<07:06,  9.02it/s, loss=1.21, v_num=0, val_loss_epoch=0.672, train_loss=0.955]#015Epoch 0:  35%|███▍      | 2040/5888 [03:46<07:06,  9.02it/s, loss=1.05, v_num=0, val_loss_epoch=0.672, trai

[34m<03:50,  9.02it/s, loss=0.997, v_num=0, val_loss_epoch=0.672, train_loss=0.880]#015Epoch 0:  65%|██████▍   | 3810/5888 [07:02<03:50,  9.02it/s, loss=1.09, v_num=0, val_loss_epoch=0.672, train_loss=1.200] #015Epoch 0:  65%|██████▌   | 3840/5888 [07:05<03:46,  9.03it/s, loss=1.09, v_num=0, val_loss_epoch=0.672, train_loss=1.200]#015Epoch 0:  65%|██████▌   | 3840/5888 [07:05<03:46,  9.03it/s, loss=0.968, v_num=0, val_loss_epoch=0.672, train_loss=2.060]#015Epoch 0:  66%|██████▌   | 3870/5888 [07:08<03:43,  9.03it/s, loss=0.968, v_num=0, val_loss_epoch=0.672, train_loss=2.060]#015Epoch 0:  66%|██████▌   | 3870/5888 [07:08<03:43,  9.03it/s, loss=1.12, v_num=0, val_loss_epoch=0.672, train_loss=0.967] #015Epoch 0:  66%|██████▌   | 3900/5888 [07:12<03:40,  9.02it/s, loss=1.12, v_num=0, val_loss_epoch=0.672, train_loss=0.967]#015Epoch 0:  66%|██████▌   | 3900/5888 [07:12<03:40,  9.02it/s, loss=1.13, v_num=0, val_loss_epoch=0.672, train_loss=0.907]#015Epoch 0:  67%|██████▋   | 3930/5888 [07:

[34m#015Validating:  10%|▉         | 30/313 [00:02<00:26, 10.69it/s]#033[A#015Epoch 0:  95%|█████████▌| 5610/5888 [10:20<00:30,  9.03it/s, loss=1.09, v_num=0, val_loss_epoch=0.672, train_loss=1.330][0m
[34m#015Validating:  19%|█▉        | 60/313 [00:05<00:23, 10.63it/s]#033[A#015Epoch 0:  96%|█████████▌| 5640/5888 [10:23<00:27,  9.04it/s, loss=1.09, v_num=0, val_loss_epoch=0.672, train_loss=1.330][0m
[34m#015Validating:  29%|██▉       | 90/313 [00:08<00:20, 10.70it/s]#033[A#015Epoch 0:  96%|█████████▋| 5670/5888 [10:26<00:24,  9.05it/s, loss=1.09, v_num=0, val_loss_epoch=0.672, train_loss=1.330][0m
[34m#015Validating:  38%|███▊      | 120/313 [00:11<00:18, 10.69it/s]#033[A#015Epoch 0:  97%|█████████▋| 5700/5888 [10:29<00:20,  9.06it/s, loss=1.09, v_num=0, val_loss_epoch=0.672, train_loss=1.330][0m
[34m#015Validating:  48%|████▊     | 150/313 [00:14<00:15, 10.59it/s]#033[A#015Epoch 0:  97%|█████████▋| 5730/5888 [10:32<00:17,  9.06it/s, loss=1.09, v_num=0, val_loss_epoch=0.672, 

[34m03<07:50,  9.01it/s, loss=0.938, v_num=0, val_loss_epoch=0.203, train_loss=1.120, val_loss_step=0.0731]#015Epoch 1:  29%|██▊       | 1680/5888 [03:06<07:47,  9.01it/s, loss=0.938, v_num=0, val_loss_epoch=0.203, train_loss=1.120, val_loss_step=0.0731]#015Epoch 1:  29%|██▊       | 1680/5888 [03:06<07:47,  9.01it/s, loss=0.845, v_num=0, val_loss_epoch=0.203, train_loss=1.020, val_loss_step=0.0731]#015Epoch 1:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.845, v_num=0, val_loss_epoch=0.203, train_loss=1.020, val_loss_step=0.0731]#015Epoch 1:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.708, v_num=0, val_loss_epoch=0.203, train_loss=0.580, val_loss_step=0.0731]#015Epoch 1:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.708, v_num=0, val_loss_epoch=0.203, train_loss=0.580, val_loss_step=0.0731]#015Epoch 1:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.832, v_num=0, val_loss_epoch=0.203, train_loss=0.738, val_loss_step=0.0731]#015Epoch 

[34m0,  9.02it/s, loss=0.903, v_num=0, val_loss_epoch=0.203, train_loss=0.553, val_loss_step=0.0731]#015Epoch 1:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.02it/s, loss=0.836, v_num=0, val_loss_epoch=0.203, train_loss=1.050, val_loss_step=0.0731]#015Epoch 1:  56%|█████▌    | 3300/5888 [06:06<04:47,  9.01it/s, loss=0.836, v_num=0, val_loss_epoch=0.203, train_loss=1.050, val_loss_step=0.0731]#015Epoch 1:  56%|█████▌    | 3300/5888 [06:06<04:47,  9.01it/s, loss=0.953, v_num=0, val_loss_epoch=0.203, train_loss=0.668, val_loss_step=0.0731]#015Epoch 1:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.01it/s, loss=0.953, v_num=0, val_loss_epoch=0.203, train_loss=0.668, val_loss_step=0.0731]#015Epoch 1:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.01it/s, loss=0.856, v_num=0, val_loss_epoch=0.203, train_loss=0.638, val_loss_step=0.0731]#015Epoch 1:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.01it/s, loss=0.856, v_num=0, val_loss_epoch=0.203, train_loss=0.638, val_loss_step=0.0731]#015Epoch 1:  57%

[34ml_loss_step=0.0731]#015Epoch 1:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.01it/s, loss=0.777, v_num=0, val_loss_epoch=0.203, train_loss=0.726, val_loss_step=0.0731]#015Epoch 1:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.01it/s, loss=0.844, v_num=0, val_loss_epoch=0.203, train_loss=0.619, val_loss_step=0.0731]#015Epoch 1:  83%|████████▎ | 4860/5888 [08:59<01:54,  9.01it/s, loss=0.844, v_num=0, val_loss_epoch=0.203, train_loss=0.619, val_loss_step=0.0731]#015Epoch 1:  83%|████████▎ | 4860/5888 [08:59<01:54,  9.01it/s, loss=0.835, v_num=0, val_loss_epoch=0.203, train_loss=0.960, val_loss_step=0.0731]#015Epoch 1:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.01it/s, loss=0.835, v_num=0, val_loss_epoch=0.203, train_loss=0.960, val_loss_step=0.0731]#015Epoch 1:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.01it/s, loss=0.853, v_num=0, val_loss_epoch=0.203, train_loss=0.969, val_loss_step=0.0731]#015Epoch 1:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.01it/s, loss=0.853, v_num=0, val_loss

[34m#015Validating:  48%|████▊     | 150/313 [00:14<00:15, 10.65it/s]#033[A#015Epoch 1:  97%|█████████▋| 5730/5888 [10:32<00:17,  9.06it/s, loss=0.838, v_num=0, val_loss_epoch=0.203, train_loss=0.657, val_loss_step=0.0731][0m
[34m#015Validating:  58%|█████▊    | 180/313 [00:17<00:12, 10.51it/s]#033[A#015Epoch 1:  98%|█████████▊| 5760/5888 [10:35<00:14,  9.06it/s, loss=0.838, v_num=0, val_loss_epoch=0.203, train_loss=0.657, val_loss_step=0.0731][0m
[34m#015Validating:  67%|██████▋   | 210/313 [00:19<00:09, 10.38it/s]#033[A#015Epoch 1:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.07it/s, loss=0.838, v_num=0, val_loss_epoch=0.203, train_loss=0.657, val_loss_step=0.0731][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:22<00:07, 10.41it/s]#033[A#015Epoch 1:  99%|█████████▉| 5820/5888 [10:41<00:07,  9.07it/s, loss=0.838, v_num=0, val_loss_epoch=0.203, train_loss=0.657, val_loss_step=0.0731][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.52it/s]#033[A#015Epoch 1:

[34m      | 1650/5888 [03:02<07:49,  9.03it/s, loss=0.694, v_num=0, val_loss_epoch=0.186, train_loss=0.361, val_loss_step=0.0689]#015Epoch 2:  29%|██▊       | 1680/5888 [03:05<07:45,  9.04it/s, loss=0.694, v_num=0, val_loss_epoch=0.186, train_loss=0.361, val_loss_step=0.0689]#015Epoch 2:  29%|██▊       | 1680/5888 [03:05<07:45,  9.04it/s, loss=0.693, v_num=0, val_loss_epoch=0.186, train_loss=0.728, val_loss_step=0.0689]#015Epoch 2:  29%|██▉       | 1710/5888 [03:09<07:42,  9.04it/s, loss=0.693, v_num=0, val_loss_epoch=0.186, train_loss=0.728, val_loss_step=0.0689]#015Epoch 2:  29%|██▉       | 1710/5888 [03:09<07:42,  9.04it/s, loss=0.78, v_num=0, val_loss_epoch=0.186, train_loss=0.703, val_loss_step=0.0689] #015Epoch 2:  30%|██▉       | 1740/5888 [03:12<07:38,  9.04it/s, loss=0.78, v_num=0, val_loss_epoch=0.186, train_loss=0.703, val_loss_step=0.0689]#015Epoch 2:  30%|██▉       | 1740/5888 [03:12<07:38,  9.04it/s, loss=0.704, v_num=0, val_loss_epoch=0.186, train_loss=0.684, val_loss_s

[34m | 3270/5888 [06:01<04:49,  9.04it/s, loss=0.91, v_num=0, val_loss_epoch=0.186, train_loss=1.410, val_loss_step=0.0689]#015Epoch 2:  56%|█████▌    | 3270/5888 [06:01<04:49,  9.04it/s, loss=0.732, v_num=0, val_loss_epoch=0.186, train_loss=0.468, val_loss_step=0.0689]#015Epoch 2:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.04it/s, loss=0.732, v_num=0, val_loss_epoch=0.186, train_loss=0.468, val_loss_step=0.0689]#015Epoch 2:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.04it/s, loss=0.634, v_num=0, val_loss_epoch=0.186, train_loss=0.491, val_loss_step=0.0689]#015Epoch 2:  57%|█████▋    | 3330/5888 [06:08<04:43,  9.04it/s, loss=0.634, v_num=0, val_loss_epoch=0.186, train_loss=0.491, val_loss_step=0.0689]#015Epoch 2:  57%|█████▋    | 3330/5888 [06:08<04:43,  9.04it/s, loss=0.784, v_num=0, val_loss_epoch=0.186, train_loss=1.560, val_loss_step=0.0689]#015Epoch 2:  57%|█████▋    | 3360/5888 [06:11<04:39,  9.04it/s, loss=0.784, v_num=0, val_loss_epoch=0.186, train_loss=1.560, val_loss_step=0

[34m0.186, train_loss=0.669, val_loss_step=0.0689] #015Epoch 2:  82%|████████▏ | 4830/5888 [08:54<01:57,  9.03it/s, loss=0.88, v_num=0, val_loss_epoch=0.186, train_loss=0.669, val_loss_step=0.0689]#015Epoch 2:  82%|████████▏ | 4830/5888 [08:54<01:57,  9.03it/s, loss=0.626, v_num=0, val_loss_epoch=0.186, train_loss=0.742, val_loss_step=0.0689]#015Epoch 2:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.03it/s, loss=0.626, v_num=0, val_loss_epoch=0.186, train_loss=0.742, val_loss_step=0.0689]#015Epoch 2:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.03it/s, loss=0.646, v_num=0, val_loss_epoch=0.186, train_loss=0.457, val_loss_step=0.0689]#015Epoch 2:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.03it/s, loss=0.646, v_num=0, val_loss_epoch=0.186, train_loss=0.457, val_loss_step=0.0689]#015Epoch 2:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.03it/s, loss=0.699, v_num=0, val_loss_epoch=0.186, train_loss=0.965, val_loss_step=0.0689]#015Epoch 2:  84%|████████▎ | 4920/5888 [09:04<01:47,  9.03it/s, lo

[34m#015Validating:  67%|██████▋   | 210/313 [00:19<00:09, 10.39it/s]#033[A#015Epoch 2:  98%|█████████▊| 5790/5888 [10:37<00:10,  9.08it/s, loss=0.67, v_num=0, val_loss_epoch=0.186, train_loss=0.566, val_loss_step=0.0689][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:22<00:07, 10.41it/s]#033[A#015Epoch 2:  99%|█████████▉| 5820/5888 [10:40<00:07,  9.09it/s, loss=0.67, v_num=0, val_loss_epoch=0.186, train_loss=0.566, val_loss_step=0.0689][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.52it/s]#033[A#015Epoch 2:  99%|█████████▉| 5850/5888 [10:43<00:04,  9.10it/s, loss=0.67, v_num=0, val_loss_epoch=0.186, train_loss=0.566, val_loss_step=0.0689][0m
[34m#015Validating:  96%|█████████▌| 300/313 [00:28<00:01, 10.62it/s]#033[A#015Epoch 2: 100%|█████████▉| 5880/5888 [10:45<00:00,  9.10it/s, loss=0.67, v_num=0, val_loss_epoch=0.186, train_loss=0.566, val_loss_step=0.0689][0m
[34m#015Validating: 100%|██████████| 313/313 [00:29<00:00, 10.75it/s]#033[A#015Epoch 2: 100

[34m | 1650/5888 [03:03<07:50,  9.01it/s, loss=0.622, v_num=0, val_loss_epoch=0.179, train_loss=0.303, val_loss_step=0.0614]#015Epoch 3:  29%|██▊       | 1680/5888 [03:06<07:47,  9.00it/s, loss=0.622, v_num=0, val_loss_epoch=0.179, train_loss=0.303, val_loss_step=0.0614]#015Epoch 3:  29%|██▊       | 1680/5888 [03:06<07:47,  9.00it/s, loss=0.54, v_num=0, val_loss_epoch=0.179, train_loss=0.261, val_loss_step=0.0614] #015Epoch 3:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.54, v_num=0, val_loss_epoch=0.179, train_loss=0.261, val_loss_step=0.0614]#015Epoch 3:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.626, v_num=0, val_loss_epoch=0.179, train_loss=0.442, val_loss_step=0.0614]#015Epoch 3:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.626, v_num=0, val_loss_epoch=0.179, train_loss=0.442, val_loss_step=0.0614]#015Epoch 3:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.672, v_num=0, val_loss_epoch=0.179, train_loss=1.010, val_loss_step=0

[34m270/5888 [06:02<04:50,  9.02it/s, loss=0.703, v_num=0, val_loss_epoch=0.179, train_loss=0.538, val_loss_step=0.0614]#015Epoch 3:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.02it/s, loss=0.642, v_num=0, val_loss_epoch=0.179, train_loss=0.654, val_loss_step=0.0614]#015Epoch 3:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.642, v_num=0, val_loss_epoch=0.179, train_loss=0.654, val_loss_step=0.0614]#015Epoch 3:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.692, v_num=0, val_loss_epoch=0.179, train_loss=1.080, val_loss_step=0.0614]#015Epoch 3:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.692, v_num=0, val_loss_epoch=0.179, train_loss=1.080, val_loss_step=0.0614]#015Epoch 3:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.692, v_num=0, val_loss_epoch=0.179, train_loss=0.622, val_loss_step=0.0614]#015Epoch 3:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.692, v_num=0, val_loss_epoch=0.179, train_loss=0.622, val_loss_step=0.06

[34m.179, train_loss=0.441, val_loss_step=0.0614]#015Epoch 3:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.532, v_num=0, val_loss_epoch=0.179, train_loss=0.441, val_loss_step=0.0614]#015Epoch 3:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.591, v_num=0, val_loss_epoch=0.179, train_loss=0.402, val_loss_step=0.0614]#015Epoch 3:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.591, v_num=0, val_loss_epoch=0.179, train_loss=0.402, val_loss_step=0.0614]#015Epoch 3:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.672, v_num=0, val_loss_epoch=0.179, train_loss=0.912, val_loss_step=0.0614]#015Epoch 3:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.672, v_num=0, val_loss_epoch=0.179, train_loss=0.912, val_loss_step=0.0614]#015Epoch 3:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.714, v_num=0, val_loss_epoch=0.179, train_loss=0.401, val_loss_step=0.0614]#015Epoch 3:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.03it/s, los

[34m#015Validating:  67%|██████▋   | 210/313 [00:20<00:09, 10.35it/s]#033[A#015Epoch 3:  98%|█████████▊| 5790/5888 [10:37<00:10,  9.08it/s, loss=0.726, v_num=0, val_loss_epoch=0.179, train_loss=1.280, val_loss_step=0.0614][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:22<00:07, 10.39it/s]#033[A#015Epoch 3:  99%|█████████▉| 5820/5888 [10:40<00:07,  9.08it/s, loss=0.726, v_num=0, val_loss_epoch=0.179, train_loss=1.280, val_loss_step=0.0614][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.49it/s]#033[A#015Epoch 3:  99%|█████████▉| 5850/5888 [10:43<00:04,  9.09it/s, loss=0.726, v_num=0, val_loss_epoch=0.179, train_loss=1.280, val_loss_step=0.0614][0m
[34m#015Validating:  96%|█████████▌| 300/313 [00:28<00:01, 10.56it/s]#033[A#015Epoch 3: 100%|█████████▉| 5880/5888 [10:46<00:00,  9.10it/s, loss=0.726, v_num=0, val_loss_epoch=0.179, train_loss=1.280, val_loss_step=0.0614][0m
[34m#015Validating: 100%|██████████| 313/313 [00:29<00:00, 10.60it/s]#033[A#015Epoch 3:

[34m   | 1650/5888 [03:03<07:50,  9.01it/s, loss=0.592, v_num=0, val_loss_epoch=0.180, train_loss=0.744, val_loss_step=0.0462]#015Epoch 4:  29%|██▊       | 1680/5888 [03:06<07:46,  9.02it/s, loss=0.592, v_num=0, val_loss_epoch=0.180, train_loss=0.744, val_loss_step=0.0462]#015Epoch 4:  29%|██▊       | 1680/5888 [03:06<07:46,  9.02it/s, loss=0.528, v_num=0, val_loss_epoch=0.180, train_loss=0.568, val_loss_step=0.0462]#015Epoch 4:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.528, v_num=0, val_loss_epoch=0.180, train_loss=0.568, val_loss_step=0.0462]#015Epoch 4:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.553, v_num=0, val_loss_epoch=0.180, train_loss=0.410, val_loss_step=0.0462]#015Epoch 4:  30%|██▉       | 1740/5888 [03:12<07:40,  9.02it/s, loss=0.553, v_num=0, val_loss_epoch=0.180, train_loss=0.410, val_loss_step=0.0462]#015Epoch 4:  30%|██▉       | 1740/5888 [03:12<07:40,  9.02it/s, loss=0.573, v_num=0, val_loss_epoch=0.180, train_loss=0.311, val_loss_ste

[34m  | 3270/5888 [06:02<04:50,  9.02it/s, loss=0.493, v_num=0, val_loss_epoch=0.180, train_loss=1.000, val_loss_step=0.0462]#015Epoch 4:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.02it/s, loss=0.456, v_num=0, val_loss_epoch=0.180, train_loss=0.236, val_loss_step=0.0462]#015Epoch 4:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.456, v_num=0, val_loss_epoch=0.180, train_loss=0.236, val_loss_step=0.0462]#015Epoch 4:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.578, v_num=0, val_loss_epoch=0.180, train_loss=0.261, val_loss_step=0.0462]#015Epoch 4:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.578, v_num=0, val_loss_epoch=0.180, train_loss=0.261, val_loss_step=0.0462]#015Epoch 4:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.631, v_num=0, val_loss_epoch=0.180, train_loss=0.628, val_loss_step=0.0462]#015Epoch 4:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.631, v_num=0, val_loss_epoch=0.180, train_loss=0.628, val_loss_step

[34mch=0.180, train_loss=0.433, val_loss_step=0.0462]#015Epoch 4:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.549, v_num=0, val_loss_epoch=0.180, train_loss=0.433, val_loss_step=0.0462]#015Epoch 4:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.571, v_num=0, val_loss_epoch=0.180, train_loss=0.921, val_loss_step=0.0462]#015Epoch 4:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.571, v_num=0, val_loss_epoch=0.180, train_loss=0.921, val_loss_step=0.0462]#015Epoch 4:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.582, v_num=0, val_loss_epoch=0.180, train_loss=0.348, val_loss_step=0.0462]#015Epoch 4:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.582, v_num=0, val_loss_epoch=0.180, train_loss=0.348, val_loss_step=0.0462]#015Epoch 4:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.63, v_num=0, val_loss_epoch=0.180, train_loss=0.784, val_loss_step=0.0462] #015Epoch 4:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.02it/s,

[34m#015Validating:  67%|██████▋   | 210/313 [00:19<00:09, 10.41it/s]#033[A#015Epoch 4:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.08it/s, loss=0.532, v_num=0, val_loss_epoch=0.180, train_loss=0.457, val_loss_step=0.0462][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:22<00:06, 10.44it/s]#033[A#015Epoch 4:  99%|█████████▉| 5820/5888 [10:40<00:07,  9.08it/s, loss=0.532, v_num=0, val_loss_epoch=0.180, train_loss=0.457, val_loss_step=0.0462][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.54it/s]#033[A#015Epoch 4:  99%|█████████▉| 5850/5888 [10:43<00:04,  9.09it/s, loss=0.532, v_num=0, val_loss_epoch=0.180, train_loss=0.457, val_loss_step=0.0462][0m
[34m#015Validating:  96%|█████████▌| 300/313 [00:28<00:01, 10.60it/s]#033[A#015Epoch 4: 100%|█████████▉| 5880/5888 [10:46<00:00,  9.10it/s, loss=0.532, v_num=0, val_loss_epoch=0.180, train_loss=0.457, val_loss_step=0.0462][0m
[34m#015Validating: 100%|██████████| 313/313 [00:29<00:00, 10.71it/s]#033[A#015Epoch 4:

[34m   | 1650/5888 [03:03<07:50,  9.01it/s, loss=0.46, v_num=0, val_loss_epoch=0.185, train_loss=0.481, val_loss_step=0.0341] #015Epoch 5:  29%|██▊       | 1680/5888 [03:06<07:46,  9.01it/s, loss=0.46, v_num=0, val_loss_epoch=0.185, train_loss=0.481, val_loss_step=0.0341]#015Epoch 5:  29%|██▊       | 1680/5888 [03:06<07:46,  9.01it/s, loss=0.528, v_num=0, val_loss_epoch=0.185, train_loss=0.615, val_loss_step=0.0341]#015Epoch 5:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.528, v_num=0, val_loss_epoch=0.185, train_loss=0.615, val_loss_step=0.0341]#015Epoch 5:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.445, v_num=0, val_loss_epoch=0.185, train_loss=1.050, val_loss_step=0.0341]#015Epoch 5:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.445, v_num=0, val_loss_epoch=0.185, train_loss=1.050, val_loss_step=0.0341]#015Epoch 5:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.517, v_num=0, val_loss_epoch=0.185, train_loss=0.460, val_loss_step

[34m70/5888 [06:02<04:50,  9.02it/s, loss=0.575, v_num=0, val_loss_epoch=0.185, train_loss=0.484, val_loss_step=0.0341]#015Epoch 5:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.02it/s, loss=0.429, v_num=0, val_loss_epoch=0.185, train_loss=0.353, val_loss_step=0.0341]#015Epoch 5:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.429, v_num=0, val_loss_epoch=0.185, train_loss=0.353, val_loss_step=0.0341]#015Epoch 5:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.521, v_num=0, val_loss_epoch=0.185, train_loss=0.776, val_loss_step=0.0341]#015Epoch 5:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.521, v_num=0, val_loss_epoch=0.185, train_loss=0.776, val_loss_step=0.0341]#015Epoch 5:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.437, v_num=0, val_loss_epoch=0.185, train_loss=0.378, val_loss_step=0.0341]#015Epoch 5:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.437, v_num=0, val_loss_epoch=0.185, train_loss=0.378, val_loss_step=0.034

[34mtrain_loss=0.399, val_loss_step=0.0341]#015Epoch 5:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.01it/s, loss=0.395, v_num=0, val_loss_epoch=0.185, train_loss=0.399, val_loss_step=0.0341]#015Epoch 5:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.01it/s, loss=0.436, v_num=0, val_loss_epoch=0.185, train_loss=0.377, val_loss_step=0.0341]#015Epoch 5:  83%|████████▎ | 4860/5888 [08:59<01:54,  9.01it/s, loss=0.436, v_num=0, val_loss_epoch=0.185, train_loss=0.377, val_loss_step=0.0341]#015Epoch 5:  83%|████████▎ | 4860/5888 [08:59<01:54,  9.01it/s, loss=0.446, v_num=0, val_loss_epoch=0.185, train_loss=0.503, val_loss_step=0.0341]#015Epoch 5:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.01it/s, loss=0.446, v_num=0, val_loss_epoch=0.185, train_loss=0.503, val_loss_step=0.0341]#015Epoch 5:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.01it/s, loss=0.518, v_num=0, val_loss_epoch=0.185, train_loss=0.492, val_loss_step=0.0341]#015Epoch 5:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.01it/s, loss=0.51

[34m#015Validating:  48%|████▊     | 150/313 [00:14<00:15, 10.67it/s]#033[A#015Epoch 5:  97%|█████████▋| 5730/5888 [10:32<00:17,  9.05it/s, loss=0.399, v_num=0, val_loss_epoch=0.185, train_loss=0.234, val_loss_step=0.0341][0m
[34m#015Validating:  58%|█████▊    | 180/313 [00:16<00:12, 10.53it/s]#033[A#015Epoch 5:  98%|█████████▊| 5760/5888 [10:35<00:14,  9.06it/s, loss=0.399, v_num=0, val_loss_epoch=0.185, train_loss=0.234, val_loss_step=0.0341][0m
[34m#015Validating:  67%|██████▋   | 210/313 [00:19<00:09, 10.38it/s]#033[A#015Epoch 5:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.06it/s, loss=0.399, v_num=0, val_loss_epoch=0.185, train_loss=0.234, val_loss_step=0.0341][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:22<00:07, 10.41it/s]#033[A#015Epoch 5:  99%|█████████▉| 5820/5888 [10:41<00:07,  9.07it/s, loss=0.399, v_num=0, val_loss_epoch=0.185, train_loss=0.234, val_loss_step=0.0341][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.50it/s]#033[A#015Epoch 5:

[34m20, train_loss=0.796]#015Epoch 0:  33%|███▎      | 1950/5888 [03:36<07:16,  9.02it/s, loss=1.07, v_num=1, val_loss_epoch=0.720, train_loss=0.796]#015Epoch 0:  33%|███▎      | 1950/5888 [03:36<07:16,  9.02it/s, loss=1.06, v_num=1, val_loss_epoch=0.720, train_loss=1.440]#015Epoch 0:  34%|███▎      | 1980/5888 [03:39<07:13,  9.02it/s, loss=1.06, v_num=1, val_loss_epoch=0.720, train_loss=1.440]#015Epoch 0:  34%|███▎      | 1980/5888 [03:39<07:13,  9.02it/s, loss=0.989, v_num=1, val_loss_epoch=0.720, train_loss=1.330]#015Epoch 0:  34%|███▍      | 2010/5888 [03:42<07:10,  9.02it/s, loss=0.989, v_num=1, val_loss_epoch=0.720, train_loss=1.330]#015Epoch 0:  34%|███▍      | 2010/5888 [03:42<07:10,  9.02it/s, loss=1.05, v_num=1, val_loss_epoch=0.720, train_loss=0.869] #015Epoch 0:  35%|███▍      | 2040/5888 [03:46<07:06,  9.02it/s, loss=1.05, v_num=1, val_loss_epoch=0.720, train_loss=0.869]#015Epoch 0:  35%|███▍      | 2040/5888 [03:46<07:06,  9.02it/s, loss=0.976, v_num=1, val_loss_epoch=0.

[34m0:  65%|██████▍   | 3810/5888 [07:02<03:50,  9.01it/s, loss=1.09, v_num=1, val_loss_epoch=0.720, train_loss=1.690]#015Epoch 0:  65%|██████▍   | 3810/5888 [07:02<03:50,  9.01it/s, loss=0.886, v_num=1, val_loss_epoch=0.720, train_loss=0.886]#015Epoch 0:  65%|██████▌   | 3840/5888 [07:06<03:47,  9.01it/s, loss=0.886, v_num=1, val_loss_epoch=0.720, train_loss=0.886]#015Epoch 0:  65%|██████▌   | 3840/5888 [07:06<03:47,  9.01it/s, loss=0.895, v_num=1, val_loss_epoch=0.720, train_loss=1.850]#015Epoch 0:  66%|██████▌   | 3870/5888 [07:09<03:43,  9.01it/s, loss=0.895, v_num=1, val_loss_epoch=0.720, train_loss=1.850]#015Epoch 0:  66%|██████▌   | 3870/5888 [07:09<03:43,  9.01it/s, loss=0.899, v_num=1, val_loss_epoch=0.720, train_loss=0.685]#015Epoch 0:  66%|██████▌   | 3900/5888 [07:12<03:40,  9.01it/s, loss=0.899, v_num=1, val_loss_epoch=0.720, train_loss=0.685]#015Epoch 0:  66%|██████▌   | 3900/5888 [07:12<03:40,  9.01it/s, loss=1.14, v_num=1, val_loss_epoch=0.720, train_loss=0.859] #015Ep

[34m#015Validating:  29%|██▉       | 90/313 [00:08<00:21, 10.50it/s]#033[A#015Epoch 0:  96%|█████████▋| 5670/5888 [10:27<00:24,  9.04it/s, loss=0.83, v_num=1, val_loss_epoch=0.720, train_loss=0.988][0m
[34m#015Validating:  38%|███▊      | 120/313 [00:11<00:18, 10.55it/s]#033[A#015Epoch 0:  97%|█████████▋| 5700/5888 [10:30<00:20,  9.05it/s, loss=0.83, v_num=1, val_loss_epoch=0.720, train_loss=0.988][0m
[34m#015Validating:  48%|████▊     | 150/313 [00:14<00:15, 10.44it/s]#033[A#015Epoch 0:  97%|█████████▋| 5730/5888 [10:33<00:17,  9.05it/s, loss=0.83, v_num=1, val_loss_epoch=0.720, train_loss=0.988][0m
[34m#015Validating:  58%|█████▊    | 180/313 [00:17<00:12, 10.27it/s]#033[A#015Epoch 0:  98%|█████████▊| 5760/5888 [10:36<00:14,  9.06it/s, loss=0.83, v_num=1, val_loss_epoch=0.720, train_loss=0.988][0m
[34m#015Validating:  67%|██████▋   | 210/313 [00:20<00:10, 10.10it/s]#033[A#015Epoch 0:  98%|█████████▊| 5790/5888 [10:39<00:10,  9.06it/s, loss=0.83, v_num=1, val_loss_epoch=0.720

[34ms_epoch=0.187, train_loss=0.876, val_loss_step=0.048]#015Epoch 1:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.784, v_num=1, val_loss_epoch=0.187, train_loss=0.876, val_loss_step=0.048]#015Epoch 1:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.824, v_num=1, val_loss_epoch=0.187, train_loss=1.520, val_loss_step=0.048]#015Epoch 1:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.824, v_num=1, val_loss_epoch=0.187, train_loss=1.520, val_loss_step=0.048]#015Epoch 1:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.834, v_num=1, val_loss_epoch=0.187, train_loss=0.524, val_loss_step=0.048]#015Epoch 1:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.834, v_num=1, val_loss_epoch=0.187, train_loss=0.524, val_loss_step=0.048]#015Epoch 1:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.765, v_num=1, val_loss_epoch=0.187, train_loss=0.566, val_loss_step=0.048]#015Epoch 1:  58%|█████▊    | 3390/5888 [06:15<04:36,  9.02it/s, l

[34m0.187, train_loss=0.445, val_loss_step=0.048]#015Epoch 1:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.03it/s, loss=0.684, v_num=1, val_loss_epoch=0.187, train_loss=0.445, val_loss_step=0.048]#015Epoch 1:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.03it/s, loss=0.804, v_num=1, val_loss_epoch=0.187, train_loss=1.170, val_loss_step=0.048]#015Epoch 1:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.03it/s, loss=0.804, v_num=1, val_loss_epoch=0.187, train_loss=1.170, val_loss_step=0.048]#015Epoch 1:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.03it/s, loss=0.822, v_num=1, val_loss_epoch=0.187, train_loss=0.875, val_loss_step=0.048]#015Epoch 1:  84%|████████▎ | 4920/5888 [09:04<01:47,  9.03it/s, loss=0.822, v_num=1, val_loss_epoch=0.187, train_loss=0.875, val_loss_step=0.048]#015Epoch 1:  84%|████████▎ | 4920/5888 [09:04<01:47,  9.03it/s, loss=0.694, v_num=1, val_loss_epoch=0.187, train_loss=0.851, val_loss_step=0.048]#015Epoch 1:  84%|████████▍ | 4950/5888 [09:08<01:43,  9.03it/s, loss=0.69

[34m#015Validating:  48%|████▊     | 150/313 [00:14<00:15, 10.55it/s]#033[A#015Epoch 1:  97%|█████████▋| 5730/5888 [10:32<00:17,  9.06it/s, loss=0.718, v_num=1, val_loss_epoch=0.187, train_loss=0.442, val_loss_step=0.048][0m
[34m#015Validating:  58%|█████▊    | 180/313 [00:17<00:12, 10.39it/s]#033[A#015Epoch 1:  98%|█████████▊| 5760/5888 [10:35<00:14,  9.07it/s, loss=0.718, v_num=1, val_loss_epoch=0.187, train_loss=0.442, val_loss_step=0.048][0m
[34m#015Validating:  67%|██████▋   | 210/313 [00:20<00:10, 10.22it/s]#033[A#015Epoch 1:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.07it/s, loss=0.718, v_num=1, val_loss_epoch=0.187, train_loss=0.442, val_loss_step=0.048][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:23<00:07, 10.28it/s]#033[A#015Epoch 1:  99%|█████████▉| 5820/5888 [10:41<00:07,  9.08it/s, loss=0.718, v_num=1, val_loss_epoch=0.187, train_loss=0.442, val_loss_step=0.048][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.39it/s]#033[A#015Epoch 1:  99

[34m   | 1650/5888 [03:03<07:50,  9.00it/s, loss=0.782, v_num=1, val_loss_epoch=0.169, train_loss=0.576, val_loss_step=0.0435]#015Epoch 2:  29%|██▊       | 1680/5888 [03:06<07:47,  9.00it/s, loss=0.782, v_num=1, val_loss_epoch=0.169, train_loss=0.576, val_loss_step=0.0435]#015Epoch 2:  29%|██▊       | 1680/5888 [03:06<07:47,  9.00it/s, loss=0.622, v_num=1, val_loss_epoch=0.169, train_loss=0.856, val_loss_step=0.0435]#015Epoch 2:  29%|██▉       | 1710/5888 [03:09<07:44,  9.00it/s, loss=0.622, v_num=1, val_loss_epoch=0.169, train_loss=0.856, val_loss_step=0.0435]#015Epoch 2:  29%|██▉       | 1710/5888 [03:09<07:44,  9.00it/s, loss=0.602, v_num=1, val_loss_epoch=0.169, train_loss=0.449, val_loss_step=0.0435]#015Epoch 2:  30%|██▉       | 1740/5888 [03:13<07:40,  9.00it/s, loss=0.602, v_num=1, val_loss_epoch=0.169, train_loss=0.449, val_loss_step=0.0435]#015Epoch 2:  30%|██▉       | 1740/5888 [03:13<07:40,  9.00it/s, loss=0.719, v_num=1, val_loss_epoch=0.169, train_loss=0.355, val_loss_ste

[34m�    | 3270/5888 [06:02<04:50,  9.01it/s, loss=0.722, v_num=1, val_loss_epoch=0.169, train_loss=0.917, val_loss_step=0.0435]#015Epoch 2:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.01it/s, loss=0.736, v_num=1, val_loss_epoch=0.169, train_loss=0.536, val_loss_step=0.0435]#015Epoch 2:  56%|█████▌    | 3300/5888 [06:06<04:47,  9.01it/s, loss=0.736, v_num=1, val_loss_epoch=0.169, train_loss=0.536, val_loss_step=0.0435]#015Epoch 2:  56%|█████▌    | 3300/5888 [06:06<04:47,  9.01it/s, loss=0.775, v_num=1, val_loss_epoch=0.169, train_loss=0.709, val_loss_step=0.0435]#015Epoch 2:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.01it/s, loss=0.775, v_num=1, val_loss_epoch=0.169, train_loss=0.709, val_loss_step=0.0435]#015Epoch 2:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.01it/s, loss=0.737, v_num=1, val_loss_epoch=0.169, train_loss=0.455, val_loss_step=0.0435]#015Epoch 2:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.01it/s, loss=0.737, v_num=1, val_loss_epoch=0.169, train_loss=0.455, val_loss_s

[34mepoch=0.169, train_loss=0.349, val_loss_step=0.0435]#015Epoch 2:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.654, v_num=1, val_loss_epoch=0.169, train_loss=0.349, val_loss_step=0.0435]#015Epoch 2:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.731, v_num=1, val_loss_epoch=0.169, train_loss=0.930, val_loss_step=0.0435]#015Epoch 2:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.731, v_num=1, val_loss_epoch=0.169, train_loss=0.930, val_loss_step=0.0435]#015Epoch 2:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.633, v_num=1, val_loss_epoch=0.169, train_loss=0.345, val_loss_step=0.0435]#015Epoch 2:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.02it/s, loss=0.633, v_num=1, val_loss_epoch=0.169, train_loss=0.345, val_loss_step=0.0435]#015Epoch 2:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.02it/s, loss=0.675, v_num=1, val_loss_epoch=0.169, train_loss=0.656, val_loss_step=0.0435]#015Epoch 2:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.02it

[34m#015Validating:  48%|████▊     | 150/313 [00:14<00:15, 10.56it/s]#033[A#015Epoch 2:  97%|█████████▋| 5730/5888 [10:32<00:17,  9.06it/s, loss=0.64, v_num=1, val_loss_epoch=0.169, train_loss=0.556, val_loss_step=0.0435][0m
[34m#015Validating:  58%|█████▊    | 180/313 [00:17<00:12, 10.40it/s]#033[A#015Epoch 2:  98%|█████████▊| 5760/5888 [10:35<00:14,  9.07it/s, loss=0.64, v_num=1, val_loss_epoch=0.169, train_loss=0.556, val_loss_step=0.0435][0m
[34m#015Validating:  67%|██████▋   | 210/313 [00:20<00:10, 10.23it/s]#033[A#015Epoch 2:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.07it/s, loss=0.64, v_num=1, val_loss_epoch=0.169, train_loss=0.556, val_loss_step=0.0435][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:23<00:07, 10.29it/s]#033[A#015Epoch 2:  99%|█████████▉| 5820/5888 [10:41<00:07,  9.08it/s, loss=0.64, v_num=1, val_loss_epoch=0.169, train_loss=0.556, val_loss_step=0.0435][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.40it/s]#033[A#015Epoch 2:  99

[34m 1650/5888 [03:02<07:49,  9.03it/s, loss=0.591, v_num=1, val_loss_epoch=0.165, train_loss=0.774, val_loss_step=0.0282]#015Epoch 3:  29%|██▊       | 1680/5888 [03:06<07:46,  9.02it/s, loss=0.591, v_num=1, val_loss_epoch=0.165, train_loss=0.774, val_loss_step=0.0282]#015Epoch 3:  29%|██▊       | 1680/5888 [03:06<07:46,  9.02it/s, loss=0.67, v_num=1, val_loss_epoch=0.165, train_loss=0.344, val_loss_step=0.0282] #015Epoch 3:  29%|██▉       | 1710/5888 [03:09<07:42,  9.03it/s, loss=0.67, v_num=1, val_loss_epoch=0.165, train_loss=0.344, val_loss_step=0.0282]#015Epoch 3:  29%|██▉       | 1710/5888 [03:09<07:42,  9.03it/s, loss=0.622, v_num=1, val_loss_epoch=0.165, train_loss=0.452, val_loss_step=0.0282]#015Epoch 3:  30%|██▉       | 1740/5888 [03:12<07:39,  9.03it/s, loss=0.622, v_num=1, val_loss_epoch=0.165, train_loss=0.452, val_loss_step=0.0282]#015Epoch 3:  30%|██▉       | 1740/5888 [03:12<07:39,  9.03it/s, loss=0.638, v_num=1, val_loss_epoch=0.165, train_loss=0.433, val_loss_step=0.0

[34m5888 [06:02<04:50,  9.02it/s, loss=0.54, v_num=1, val_loss_epoch=0.165, train_loss=0.465, val_loss_step=0.0282]#015Epoch 3:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.02it/s, loss=0.607, v_num=1, val_loss_epoch=0.165, train_loss=0.375, val_loss_step=0.0282]#015Epoch 3:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.607, v_num=1, val_loss_epoch=0.165, train_loss=0.375, val_loss_step=0.0282]#015Epoch 3:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.572, v_num=1, val_loss_epoch=0.165, train_loss=0.560, val_loss_step=0.0282]#015Epoch 3:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.572, v_num=1, val_loss_epoch=0.165, train_loss=0.560, val_loss_step=0.0282]#015Epoch 3:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.648, v_num=1, val_loss_epoch=0.165, train_loss=0.745, val_loss_step=0.0282]#015Epoch 3:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.648, v_num=1, val_loss_epoch=0.165, train_loss=0.745, val_loss_step=0.0282]#0

[34mtrain_loss=1.010, val_loss_step=0.0282]#015Epoch 3:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.682, v_num=1, val_loss_epoch=0.165, train_loss=1.010, val_loss_step=0.0282]#015Epoch 3:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.557, v_num=1, val_loss_epoch=0.165, train_loss=1.060, val_loss_step=0.0282]#015Epoch 3:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.03it/s, loss=0.557, v_num=1, val_loss_epoch=0.165, train_loss=1.060, val_loss_step=0.0282]#015Epoch 3:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.03it/s, loss=0.637, v_num=1, val_loss_epoch=0.165, train_loss=0.536, val_loss_step=0.0282]#015Epoch 3:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.637, v_num=1, val_loss_epoch=0.165, train_loss=0.536, val_loss_step=0.0282]#015Epoch 3:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.564, v_num=1, val_loss_epoch=0.165, train_loss=0.451, val_loss_step=0.0282]#015Epoch 3:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.03it/s, loss=0.56

[34m#015Validating:  67%|██████▋   | 210/313 [00:20<00:10, 10.17it/s]#033[A#015Epoch 3:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.07it/s, loss=0.579, v_num=1, val_loss_epoch=0.165, train_loss=0.565, val_loss_step=0.0282][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:23<00:07, 10.24it/s]#033[A#015Epoch 3:  99%|█████████▉| 5820/5888 [10:41<00:07,  9.07it/s, loss=0.579, v_num=1, val_loss_epoch=0.165, train_loss=0.565, val_loss_step=0.0282][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:26<00:04, 10.37it/s]#033[A#015Epoch 3:  99%|█████████▉| 5850/5888 [10:44<00:04,  9.08it/s, loss=0.579, v_num=1, val_loss_epoch=0.165, train_loss=0.565, val_loss_step=0.0282][0m
[34m#015Validating:  96%|█████████▌| 300/313 [00:28<00:01, 10.53it/s]#033[A#015Epoch 3: 100%|█████████▉| 5880/5888 [10:46<00:00,  9.09it/s, loss=0.579, v_num=1, val_loss_epoch=0.165, train_loss=0.565, val_loss_step=0.0282][0m
[34m#015Validating: 100%|██████████| 313/313 [00:30<00:00, 10.63it/s]#033[A#015Epoch 3:

[34m  | 1650/5888 [03:03<07:50,  9.01it/s, loss=0.615, v_num=1, val_loss_epoch=0.165, train_loss=0.914, val_loss_step=0.0524]#015Epoch 4:  29%|██▊       | 1680/5888 [03:06<07:46,  9.02it/s, loss=0.615, v_num=1, val_loss_epoch=0.165, train_loss=0.914, val_loss_step=0.0524]#015Epoch 4:  29%|██▊       | 1680/5888 [03:06<07:46,  9.02it/s, loss=0.516, v_num=1, val_loss_epoch=0.165, train_loss=0.456, val_loss_step=0.0524]#015Epoch 4:  29%|██▉       | 1710/5888 [03:09<07:43,  9.02it/s, loss=0.516, v_num=1, val_loss_epoch=0.165, train_loss=0.456, val_loss_step=0.0524]#015Epoch 4:  29%|██▉       | 1710/5888 [03:09<07:43,  9.02it/s, loss=0.522, v_num=1, val_loss_epoch=0.165, train_loss=0.407, val_loss_step=0.0524]#015Epoch 4:  30%|██▉       | 1740/5888 [03:12<07:39,  9.02it/s, loss=0.522, v_num=1, val_loss_epoch=0.165, train_loss=0.407, val_loss_step=0.0524]#015Epoch 4:  30%|██▉       | 1740/5888 [03:12<07:39,  9.02it/s, loss=0.576, v_num=1, val_loss_epoch=0.165, train_loss=0.590, val_loss_step

[34m 3270/5888 [06:02<04:50,  9.03it/s, loss=0.557, v_num=1, val_loss_epoch=0.165, train_loss=1.240, val_loss_step=0.0524]#015Epoch 4:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.03it/s, loss=0.633, v_num=1, val_loss_epoch=0.165, train_loss=1.650, val_loss_step=0.0524]#015Epoch 4:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.633, v_num=1, val_loss_epoch=0.165, train_loss=1.650, val_loss_step=0.0524]#015Epoch 4:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.61, v_num=1, val_loss_epoch=0.165, train_loss=0.502, val_loss_step=0.0524] #015Epoch 4:  57%|█████▋    | 3330/5888 [06:08<04:43,  9.03it/s, loss=0.61, v_num=1, val_loss_epoch=0.165, train_loss=0.502, val_loss_step=0.0524]#015Epoch 4:  57%|█████▋    | 3330/5888 [06:08<04:43,  9.03it/s, loss=0.531, v_num=1, val_loss_epoch=0.165, train_loss=0.333, val_loss_step=0.0524]#015Epoch 4:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.531, v_num=1, val_loss_epoch=0.165, train_loss=0.333, val_loss_step=0.0

[34m.165, train_loss=0.362, val_loss_step=0.0524]#015Epoch 4:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.512, v_num=1, val_loss_epoch=0.165, train_loss=0.362, val_loss_step=0.0524]#015Epoch 4:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.667, v_num=1, val_loss_epoch=0.165, train_loss=0.352, val_loss_step=0.0524]#015Epoch 4:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.667, v_num=1, val_loss_epoch=0.165, train_loss=0.352, val_loss_step=0.0524]#015Epoch 4:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.488, v_num=1, val_loss_epoch=0.165, train_loss=0.266, val_loss_step=0.0524]#015Epoch 4:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.488, v_num=1, val_loss_epoch=0.165, train_loss=0.266, val_loss_step=0.0524]#015Epoch 4:  83%|████████▎ | 4890/5888 [09:01<01:50,  9.02it/s, loss=0.472, v_num=1, val_loss_epoch=0.165, train_loss=0.430, val_loss_step=0.0524]#015Epoch 4:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.02it/s, los

[34m#015Validating:  67%|██████▋   | 210/313 [00:20<00:10, 10.20it/s]#033[A#015Epoch 4:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.07it/s, loss=0.459, v_num=1, val_loss_epoch=0.165, train_loss=0.412, val_loss_step=0.0524][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:23<00:07, 10.20it/s]#033[A#015Epoch 4:  99%|█████████▉| 5820/5888 [10:41<00:07,  9.08it/s, loss=0.459, v_num=1, val_loss_epoch=0.165, train_loss=0.412, val_loss_step=0.0524][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.36it/s]#033[A#015Epoch 4:  99%|█████████▉| 5850/5888 [10:44<00:04,  9.08it/s, loss=0.459, v_num=1, val_loss_epoch=0.165, train_loss=0.412, val_loss_step=0.0524][0m
[34m#015Validating:  96%|█████████▌| 300/313 [00:28<00:01, 10.52it/s]#033[A#015Epoch 4: 100%|█████████▉| 5880/5888 [10:46<00:00,  9.09it/s, loss=0.459, v_num=1, val_loss_epoch=0.165, train_loss=0.412, val_loss_step=0.0524][0m
[34m#015Validating: 100%|██████████| 313/313 [00:29<00:00, 10.62it/s]#033[A#015Epoch 4:

[34m | 1650/5888 [03:03<07:50,  9.01it/s, loss=0.59, v_num=1, val_loss_epoch=0.163, train_loss=0.406, val_loss_step=0.0206]#015Epoch 5:  29%|██▊       | 1680/5888 [03:06<07:47,  9.01it/s, loss=0.59, v_num=1, val_loss_epoch=0.163, train_loss=0.406, val_loss_step=0.0206]#015Epoch 5:  29%|██▊       | 1680/5888 [03:06<07:47,  9.01it/s, loss=0.586, v_num=1, val_loss_epoch=0.163, train_loss=0.445, val_loss_step=0.0206]#015Epoch 5:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.586, v_num=1, val_loss_epoch=0.163, train_loss=0.445, val_loss_step=0.0206]#015Epoch 5:  29%|██▉       | 1710/5888 [03:09<07:43,  9.01it/s, loss=0.478, v_num=1, val_loss_epoch=0.163, train_loss=0.513, val_loss_step=0.0206]#015Epoch 5:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.478, v_num=1, val_loss_epoch=0.163, train_loss=0.513, val_loss_step=0.0206]#015Epoch 5:  30%|██▉       | 1740/5888 [03:13<07:40,  9.01it/s, loss=0.533, v_num=1, val_loss_epoch=0.163, train_loss=0.379, val_loss_step=0.

[34m| 3270/5888 [06:02<04:50,  9.02it/s, loss=0.502, v_num=1, val_loss_epoch=0.163, train_loss=0.345, val_loss_step=0.0206]#015Epoch 5:  56%|█████▌    | 3270/5888 [06:02<04:50,  9.02it/s, loss=0.487, v_num=1, val_loss_epoch=0.163, train_loss=0.387, val_loss_step=0.0206]#015Epoch 5:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.487, v_num=1, val_loss_epoch=0.163, train_loss=0.387, val_loss_step=0.0206]#015Epoch 5:  56%|█████▌    | 3300/5888 [06:05<04:46,  9.02it/s, loss=0.54, v_num=1, val_loss_epoch=0.163, train_loss=0.276, val_loss_step=0.0206] #015Epoch 5:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.54, v_num=1, val_loss_epoch=0.163, train_loss=0.276, val_loss_step=0.0206]#015Epoch 5:  57%|█████▋    | 3330/5888 [06:09<04:43,  9.02it/s, loss=0.552, v_num=1, val_loss_epoch=0.163, train_loss=0.260, val_loss_step=0.0206]#015Epoch 5:  57%|█████▋    | 3360/5888 [06:12<04:40,  9.02it/s, loss=0.552, v_num=1, val_loss_epoch=0.163, train_loss=0.260, val_loss_step=0.

[34m163, train_loss=0.325, val_loss_step=0.0206]#015Epoch 5:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.469, v_num=1, val_loss_epoch=0.163, train_loss=0.325, val_loss_step=0.0206]#015Epoch 5:  82%|████████▏ | 4830/5888 [08:55<01:57,  9.02it/s, loss=0.516, v_num=1, val_loss_epoch=0.163, train_loss=0.817, val_loss_step=0.0206]#015Epoch 5:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.516, v_num=1, val_loss_epoch=0.163, train_loss=0.817, val_loss_step=0.0206]#015Epoch 5:  83%|████████▎ | 4860/5888 [08:58<01:53,  9.02it/s, loss=0.488, v_num=1, val_loss_epoch=0.163, train_loss=0.532, val_loss_step=0.0206]#015Epoch 5:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.02it/s, loss=0.488, v_num=1, val_loss_epoch=0.163, train_loss=0.532, val_loss_step=0.0206]#015Epoch 5:  83%|████████▎ | 4890/5888 [09:02<01:50,  9.02it/s, loss=0.534, v_num=1, val_loss_epoch=0.163, train_loss=0.412, val_loss_step=0.0206]#015Epoch 5:  84%|████████▎ | 4920/5888 [09:05<01:47,  9.02it/s, loss

[34m#015Validating:  48%|████▊     | 150/313 [00:14<00:15, 10.57it/s]#033[A#015Epoch 5:  97%|█████████▋| 5730/5888 [10:32<00:17,  9.06it/s, loss=0.412, v_num=1, val_loss_epoch=0.163, train_loss=0.350, val_loss_step=0.0206][0m
[34m#015Validating:  58%|█████▊    | 180/313 [00:17<00:12, 10.41it/s]#033[A#015Epoch 5:  98%|█████████▊| 5760/5888 [10:35<00:14,  9.07it/s, loss=0.412, v_num=1, val_loss_epoch=0.163, train_loss=0.350, val_loss_step=0.0206][0m
[34m#015Validating:  67%|██████▋   | 210/313 [00:20<00:10, 10.25it/s]#033[A#015Epoch 5:  98%|█████████▊| 5790/5888 [10:38<00:10,  9.07it/s, loss=0.412, v_num=1, val_loss_epoch=0.163, train_loss=0.350, val_loss_step=0.0206][0m
[34m#015Validating:  77%|███████▋  | 240/313 [00:23<00:07, 10.31it/s]#033[A#015Epoch 5:  99%|█████████▉| 5820/5888 [10:41<00:07,  9.08it/s, loss=0.412, v_num=1, val_loss_epoch=0.163, train_loss=0.350, val_loss_step=0.0206][0m
[34m#015Validating:  86%|████████▋ | 270/313 [00:25<00:04, 10.43it/s]#033[A#015Epoch 5:

[34m[03:35<09:23,  9.03it/s, loss=1.56, v_num=0, val_loss_epoch=0.716, train_loss=0.904]#015Epoch 0:  28%|██▊       | 1950/7036 [03:35<09:23,  9.03it/s, loss=1.88, v_num=0, val_loss_epoch=0.716, train_loss=1.550]#015Epoch 0:  28%|██▊       | 1980/7036 [03:39<09:19,  9.03it/s, loss=1.88, v_num=0, val_loss_epoch=0.716, train_loss=1.550]#015Epoch 0:  28%|██▊       | 1980/7036 [03:39<09:19,  9.03it/s, loss=1.21, v_num=0, val_loss_epoch=0.716, train_loss=1.690]#015Epoch 0:  29%|██▊       | 2010/7036 [03:42<09:16,  9.03it/s, loss=1.21, v_num=0, val_loss_epoch=0.716, train_loss=1.690]#015Epoch 0:  29%|██▊       | 2010/7036 [03:42<09:16,  9.03it/s, loss=1.5, v_num=0, val_loss_epoch=0.716, train_loss=5.240] #015Epoch 0:  29%|██▉       | 2040/7036 [03:45<09:12,  9.03it/s, loss=1.5, v_num=0, val_loss_epoch=0.716, train_loss=5.240]#015Epoch 0:  29%|██▉       | 2040/7036 [03:45<09:12,  9.03it/s, loss=1.71, v_num=0, val_loss_epoch=0.716, train_loss=1.060]#015Epoch 0:  29%|██▉       | 2070/7036 [03:

[34m.03it/s, loss=1.34, v_num=0, val_loss_epoch=0.716, train_loss=1.060]#015Epoch 0:  55%|█████▍    | 3840/7036 [07:05<05:53,  9.03it/s, loss=1.8, v_num=0, val_loss_epoch=0.716, train_loss=0.953] #015Epoch 0:  55%|█████▌    | 3870/7036 [07:08<05:50,  9.03it/s, loss=1.8, v_num=0, val_loss_epoch=0.716, train_loss=0.953]#015Epoch 0:  55%|█████▌    | 3870/7036 [07:08<05:50,  9.03it/s, loss=1.59, v_num=0, val_loss_epoch=0.716, train_loss=3.750]#015Epoch 0:  55%|█████▌    | 3900/7036 [07:12<05:47,  9.03it/s, loss=1.59, v_num=0, val_loss_epoch=0.716, train_loss=3.750]#015Epoch 0:  55%|█████▌    | 3900/7036 [07:12<05:47,  9.03it/s, loss=1.19, v_num=0, val_loss_epoch=0.716, train_loss=0.989]#015Epoch 0:  56%|█████▌    | 3930/7036 [07:15<05:44,  9.03it/s, loss=1.19, v_num=0, val_loss_epoch=0.716, train_loss=0.989]#015Epoch 0:  56%|█████▌    | 3930/7036 [07:15<05:44,  9.03it/s, loss=1.58, v_num=0, val_loss_epoch=0.716, train_loss=1.130]#015Epoch 0:  56%|█████▋    | 3960/7036 [07:18<05:40,  9.03i

[34mch=0.716, train_loss=5.400]#015Epoch 0:  81%|████████  | 5670/7036 [10:27<02:31,  9.03it/s, loss=1.28, v_num=0, val_loss_epoch=0.716, train_loss=5.400]#015Epoch 0:  81%|████████  | 5670/7036 [10:27<02:31,  9.03it/s, loss=1.54, v_num=0, val_loss_epoch=0.716, train_loss=1.440]#015Epoch 0:  81%|████████  | 5700/7036 [10:31<02:27,  9.03it/s, loss=1.54, v_num=0, val_loss_epoch=0.716, train_loss=1.440]#015Epoch 0:  81%|████████  | 5700/7036 [10:31<02:27,  9.03it/s, loss=1.06, v_num=0, val_loss_epoch=0.716, train_loss=1.030]#015Epoch 0:  81%|████████▏ | 5730/7036 [10:34<02:24,  9.03it/s, loss=1.06, v_num=0, val_loss_epoch=0.716, train_loss=1.030]#015Epoch 0:  81%|████████▏ | 5730/7036 [10:34<02:24,  9.03it/s, loss=1.02, v_num=0, val_loss_epoch=0.716, train_loss=1.070]#015Epoch 0:  82%|████████▏ | 5760/7036 [10:37<02:21,  9.03it/s, loss=1.02, v_num=0, val_loss_epoch=0.716, train_loss=1.070]#015Epoch 0:  82%|████████▏ | 5760/7036 [10:37<02:21,  9.03it/s, loss=1.46, v_num=0, val_loss_epoch=

[34m#015Validating:  23%|██▎       | 90/383 [00:08<00:27, 10.55it/s]#033[A#015Epoch 0:  96%|█████████▌| 6750/7036 [12:25<00:31,  9.05it/s, loss=1.42, v_num=0, val_loss_epoch=0.716, train_loss=1.160][0m
[34m#015Validating:  31%|███▏      | 120/383 [00:11<00:24, 10.64it/s]#033[A#015Epoch 0:  96%|█████████▋| 6780/7036 [12:28<00:28,  9.06it/s, loss=1.42, v_num=0, val_loss_epoch=0.716, train_loss=1.160][0m
[34m#015Validating:  39%|███▉      | 150/383 [00:14<00:21, 10.63it/s]#033[A#015Epoch 0:  97%|█████████▋| 6810/7036 [12:31<00:24,  9.07it/s, loss=1.42, v_num=0, val_loss_epoch=0.716, train_loss=1.160][0m
[34m#015Validating:  47%|████▋     | 180/383 [00:17<00:19, 10.54it/s]#033[A#015Epoch 0:  97%|█████████▋| 6840/7036 [12:34<00:21,  9.07it/s, loss=1.42, v_num=0, val_loss_epoch=0.716, train_loss=1.160][0m
[34m#015Validating:  55%|█████▍    | 210/383 [00:20<00:16, 10.36it/s]#033[A#015Epoch 0:  98%|█████████▊| 6870/7036 [12:37<00:18,  9.07it/s, loss=1.42, v_num=0, val_loss_epoch=0.716

[34m6]#015Epoch 1:  24%|██▍       | 1680/7036 [03:07<09:56,  8.98it/s, loss=1.04, v_num=0, val_loss_epoch=0.228, train_loss=0.873, val_loss_step=0.296]#015Epoch 1:  24%|██▍       | 1710/7036 [03:10<09:53,  8.98it/s, loss=1.04, v_num=0, val_loss_epoch=0.228, train_loss=0.873, val_loss_step=0.296]#015Epoch 1:  24%|██▍       | 1710/7036 [03:10<09:53,  8.98it/s, loss=1.4, v_num=0, val_loss_epoch=0.228, train_loss=1.090, val_loss_step=0.296] #015Epoch 1:  25%|██▍       | 1740/7036 [03:13<09:49,  8.98it/s, loss=1.4, v_num=0, val_loss_epoch=0.228, train_loss=1.090, val_loss_step=0.296]#015Epoch 1:  25%|██▍       | 1740/7036 [03:13<09:49,  8.98it/s, loss=1.16, v_num=0, val_loss_epoch=0.228, train_loss=0.741, val_loss_step=0.296]#015Epoch 1:  25%|██▌       | 1770/7036 [03:17<09:46,  8.98it/s, loss=1.16, v_num=0, val_loss_epoch=0.228, train_loss=0.741, val_loss_step=0.296]#015Epoch 1:  25%|██▌       | 1770/7036 [03:17<09:46,  8.98it/s, loss=1.01, v_num=0, val_loss_epoch=0.228, train_loss=1.270,

[34m06:09<06:51,  9.01it/s, loss=1.15, v_num=0, val_loss_epoch=0.228, train_loss=1.300, val_loss_step=0.296]#015Epoch 1:  47%|████▋     | 3330/7036 [06:09<06:51,  9.01it/s, loss=0.993, v_num=0, val_loss_epoch=0.228, train_loss=1.650, val_loss_step=0.296]#015Epoch 1:  48%|████▊     | 3360/7036 [06:12<06:48,  9.01it/s, loss=0.993, v_num=0, val_loss_epoch=0.228, train_loss=1.650, val_loss_step=0.296]#015Epoch 1:  48%|████▊     | 3360/7036 [06:12<06:48,  9.01it/s, loss=1.64, v_num=0, val_loss_epoch=0.228, train_loss=0.908, val_loss_step=0.296] #015Epoch 1:  48%|████▊     | 3390/7036 [06:16<06:44,  9.01it/s, loss=1.64, v_num=0, val_loss_epoch=0.228, train_loss=0.908, val_loss_step=0.296]#015Epoch 1:  48%|████▊     | 3390/7036 [06:16<06:44,  9.01it/s, loss=2.32, v_num=0, val_loss_epoch=0.228, train_loss=1.040, val_loss_step=0.296]#015Epoch 1:  49%|████▊     | 3420/7036 [06:19<06:41,  9.01it/s, loss=2.32, v_num=0, val_loss_epoch=0.228, train_loss=1.040, val_loss_step=0.296]#015Epoch 1:  49%|

[34m 9.01it/s, loss=1.87, v_num=0, val_loss_epoch=0.228, train_loss=0.812, val_loss_step=0.296]#015Epoch 1:  70%|██████▉   | 4920/7036 [09:06<03:54,  9.01it/s, loss=1, v_num=0, val_loss_epoch=0.228, train_loss=1.370, val_loss_step=0.296]   #015Epoch 1:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=1, v_num=0, val_loss_epoch=0.228, train_loss=1.370, val_loss_step=0.296]#015Epoch 1:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=1.72, v_num=0, val_loss_epoch=0.228, train_loss=0.663, val_loss_step=0.296]#015Epoch 1:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=1.72, v_num=0, val_loss_epoch=0.228, train_loss=0.663, val_loss_step=0.296]#015Epoch 1:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=1.24, v_num=0, val_loss_epoch=0.228, train_loss=1.270, val_loss_step=0.296]#015Epoch 1:  71%|███████   | 5010/7036 [09:16<03:44,  9.01it/s, loss=1.24, v_num=0, val_loss_epoch=0.228, train_loss=1.270, val_loss_step=0.296]#015Epoch 1:  71%|███████   | 5010/70

[34moss=1.36, v_num=0, val_loss_epoch=0.228, train_loss=2.580, val_loss_step=0.296]#015Epoch 1:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.01it/s, loss=1.36, v_num=0, val_loss_epoch=0.228, train_loss=2.580, val_loss_step=0.296]#015Epoch 1:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.01it/s, loss=1.17, v_num=0, val_loss_epoch=0.228, train_loss=1.060, val_loss_step=0.296]#015Epoch 1:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=1.17, v_num=0, val_loss_epoch=0.228, train_loss=1.060, val_loss_step=0.296]#015Epoch 1:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=1.16, v_num=0, val_loss_epoch=0.228, train_loss=2.100, val_loss_step=0.296]#015Epoch 1:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.02it/s, loss=1.16, v_num=0, val_loss_epoch=0.228, train_loss=2.100, val_loss_step=0.296]#015Epoch 1:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.02it/s, loss=1.24, v_num=0, val_loss_epoch=0.228, train_loss=0.835, val_loss_step=0.296]#015Epoch 1:  93%|█████████▎| 6570/7036 [12:08

[34m=0.217, train_loss=0.854, val_loss_step=0.341]#015Epoch 2:  24%|██▍       | 1680/7036 [03:06<09:54,  9.01it/s, loss=1.32, v_num=0, val_loss_epoch=0.217, train_loss=2.640, val_loss_step=0.341]#015Epoch 2:  24%|██▍       | 1710/7036 [03:09<09:51,  9.01it/s, loss=1.32, v_num=0, val_loss_epoch=0.217, train_loss=2.640, val_loss_step=0.341]#015Epoch 2:  24%|██▍       | 1710/7036 [03:09<09:51,  9.01it/s, loss=0.964, v_num=0, val_loss_epoch=0.217, train_loss=0.811, val_loss_step=0.341]#015Epoch 2:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=0.964, v_num=0, val_loss_epoch=0.217, train_loss=0.811, val_loss_step=0.341]#015Epoch 2:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=1.12, v_num=0, val_loss_epoch=0.217, train_loss=0.737, val_loss_step=0.341] #015Epoch 2:  25%|██▌       | 1770/7036 [03:16<09:44,  9.01it/s, loss=1.12, v_num=0, val_loss_epoch=0.217, train_loss=0.737, val_loss_step=0.341]#015Epoch 2:  25%|██▌       | 1770/7036 [03:16<09:44,  9.01it/s, loss=1.18, 

[34mtrain_loss=1.190, val_loss_step=0.341]#015Epoch 2:  47%|████▋     | 3330/7036 [06:09<06:51,  9.01it/s, loss=1.25, v_num=0, val_loss_epoch=0.217, train_loss=1.190, val_loss_step=0.341]#015Epoch 2:  47%|████▋     | 3330/7036 [06:09<06:51,  9.01it/s, loss=1.18, v_num=0, val_loss_epoch=0.217, train_loss=3.430, val_loss_step=0.341]#015Epoch 2:  48%|████▊     | 3360/7036 [06:12<06:47,  9.01it/s, loss=1.18, v_num=0, val_loss_epoch=0.217, train_loss=3.430, val_loss_step=0.341]#015Epoch 2:  48%|████▊     | 3360/7036 [06:12<06:47,  9.01it/s, loss=1.43, v_num=0, val_loss_epoch=0.217, train_loss=3.090, val_loss_step=0.341]#015Epoch 2:  48%|████▊     | 3390/7036 [06:16<06:44,  9.01it/s, loss=1.43, v_num=0, val_loss_epoch=0.217, train_loss=3.090, val_loss_step=0.341]#015Epoch 2:  48%|████▊     | 3390/7036 [06:16<06:44,  9.01it/s, loss=1.18, v_num=0, val_loss_epoch=0.217, train_loss=1.080, val_loss_step=0.341]#015Epoch 2:  49%|████▊     | 3420/7036 [06:19<06:41,  9.01it/s, loss=1.18, v_num=0, va

[34mpoch=0.217, train_loss=1.160, val_loss_step=0.341] #015Epoch 2:  70%|██████▉   | 4920/7036 [09:05<03:54,  9.02it/s, loss=1.13, v_num=0, val_loss_epoch=0.217, train_loss=1.160, val_loss_step=0.341]#015Epoch 2:  70%|██████▉   | 4920/7036 [09:05<03:54,  9.02it/s, loss=1.08, v_num=0, val_loss_epoch=0.217, train_loss=0.916, val_loss_step=0.341]#015Epoch 2:  70%|███████   | 4950/7036 [09:08<03:51,  9.02it/s, loss=1.08, v_num=0, val_loss_epoch=0.217, train_loss=0.916, val_loss_step=0.341]#015Epoch 2:  70%|███████   | 4950/7036 [09:08<03:51,  9.02it/s, loss=1.01, v_num=0, val_loss_epoch=0.217, train_loss=0.701, val_loss_step=0.341]#015Epoch 2:  71%|███████   | 4980/7036 [09:12<03:47,  9.02it/s, loss=1.01, v_num=0, val_loss_epoch=0.217, train_loss=0.701, val_loss_step=0.341]#015Epoch 2:  71%|███████   | 4980/7036 [09:12<03:47,  9.02it/s, loss=1.04, v_num=0, val_loss_epoch=0.217, train_loss=0.614, val_loss_step=0.341]#015Epoch 2:  71%|███████   | 5010/7036 [09:15<03:44,  9.02it/s, loss=1.04

[34mum=0, val_loss_epoch=0.217, train_loss=0.641, val_loss_step=0.341]#015Epoch 2:  92%|█████████▏| 6450/7036 [11:55<01:04,  9.02it/s, loss=1.06, v_num=0, val_loss_epoch=0.217, train_loss=0.789, val_loss_step=0.341]#015Epoch 2:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.02it/s, loss=1.06, v_num=0, val_loss_epoch=0.217, train_loss=0.789, val_loss_step=0.341]#015Epoch 2:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.02it/s, loss=0.949, v_num=0, val_loss_epoch=0.217, train_loss=0.827, val_loss_step=0.341]#015Epoch 2:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.02it/s, loss=0.949, v_num=0, val_loss_epoch=0.217, train_loss=0.827, val_loss_step=0.341]#015Epoch 2:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.02it/s, loss=1.21, v_num=0, val_loss_epoch=0.217, train_loss=3.680, val_loss_step=0.341] #015Epoch 2:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.02it/s, loss=1.21, v_num=0, val_loss_epoch=0.217, train_loss=3.680, val_loss_step=0.341]#015Epoch 2:  93%|█████████▎| 6540/7036 [12:05<00:55,  9

[34mum=0, val_loss_epoch=0.207, train_loss=0.739, val_loss_step=0.268]#015Epoch 3:  24%|██▍       | 1680/7036 [03:06<09:54,  9.01it/s, loss=0.779, v_num=0, val_loss_epoch=0.207, train_loss=0.702, val_loss_step=0.268]#015Epoch 3:  24%|██▍       | 1710/7036 [03:09<09:51,  9.01it/s, loss=0.779, v_num=0, val_loss_epoch=0.207, train_loss=0.702, val_loss_step=0.268]#015Epoch 3:  24%|██▍       | 1710/7036 [03:09<09:51,  9.01it/s, loss=1.15, v_num=0, val_loss_epoch=0.207, train_loss=1.070, val_loss_step=0.268] #015Epoch 3:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=1.15, v_num=0, val_loss_epoch=0.207, train_loss=1.070, val_loss_step=0.268]#015Epoch 3:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=0.753, v_num=0, val_loss_epoch=0.207, train_loss=0.628, val_loss_step=0.268]#015Epoch 3:  25%|██▌       | 1770/7036 [03:16<09:44,  9.01it/s, loss=0.753, v_num=0, val_loss_epoch=0.207, train_loss=0.628, val_loss_step=0.268]#015Epoch 3:  25%|██▌       | 1770/7036 [03:16<09:44, 

[34m loss=0.895, v_num=0, val_loss_epoch=0.207, train_loss=0.770, val_loss_step=0.268]#015Epoch 3:  47%|████▋     | 3330/7036 [06:08<06:50,  9.03it/s, loss=0.895, v_num=0, val_loss_epoch=0.207, train_loss=0.770, val_loss_step=0.268]#015Epoch 3:  47%|████▋     | 3330/7036 [06:08<06:50,  9.03it/s, loss=0.784, v_num=0, val_loss_epoch=0.207, train_loss=1.240, val_loss_step=0.268]#015Epoch 3:  48%|████▊     | 3360/7036 [06:12<06:47,  9.03it/s, loss=0.784, v_num=0, val_loss_epoch=0.207, train_loss=1.240, val_loss_step=0.268]#015Epoch 3:  48%|████▊     | 3360/7036 [06:12<06:47,  9.03it/s, loss=0.836, v_num=0, val_loss_epoch=0.207, train_loss=0.715, val_loss_step=0.268]#015Epoch 3:  48%|████▊     | 3390/7036 [06:15<06:43,  9.03it/s, loss=0.836, v_num=0, val_loss_epoch=0.207, train_loss=0.715, val_loss_step=0.268]#015Epoch 3:  48%|████▊     | 3390/7036 [06:15<06:43,  9.03it/s, loss=0.741, v_num=0, val_loss_epoch=0.207, train_loss=0.505, val_loss_step=0.268]#015Epoch 3:  49%|████▊     | 3420/70

[34m��█▉   | 4890/7036 [09:01<03:57,  9.03it/s, loss=1.01, v_num=0, val_loss_epoch=0.207, train_loss=0.814, val_loss_step=0.268]#015Epoch 3:  70%|██████▉   | 4920/7036 [09:05<03:54,  9.03it/s, loss=1.01, v_num=0, val_loss_epoch=0.207, train_loss=0.814, val_loss_step=0.268]#015Epoch 3:  70%|██████▉   | 4920/7036 [09:05<03:54,  9.03it/s, loss=0.983, v_num=0, val_loss_epoch=0.207, train_loss=1.410, val_loss_step=0.268]#015Epoch 3:  70%|███████   | 4950/7036 [09:08<03:51,  9.03it/s, loss=0.983, v_num=0, val_loss_epoch=0.207, train_loss=1.410, val_loss_step=0.268]#015Epoch 3:  70%|███████   | 4950/7036 [09:08<03:51,  9.03it/s, loss=1.04, v_num=0, val_loss_epoch=0.207, train_loss=0.652, val_loss_step=0.268] #015Epoch 3:  71%|███████   | 4980/7036 [09:11<03:47,  9.03it/s, loss=1.04, v_num=0, val_loss_epoch=0.207, train_loss=0.652, val_loss_step=0.268]#015Epoch 3:  71%|███████   | 4980/7036 [09:11<03:47,  9.03it/s, loss=0.841, v_num=0, val_loss_epoch=0.207, train_loss=0.691, val_loss_step=0.2

[34mp=0.268]#015Epoch 3:  92%|█████████▏| 6450/7036 [11:54<01:04,  9.03it/s, loss=0.723, v_num=0, val_loss_epoch=0.207, train_loss=0.743, val_loss_step=0.268]#015Epoch 3:  92%|█████████▏| 6450/7036 [11:54<01:04,  9.03it/s, loss=0.972, v_num=0, val_loss_epoch=0.207, train_loss=0.882, val_loss_step=0.268]#015Epoch 3:  92%|█████████▏| 6480/7036 [11:57<01:01,  9.03it/s, loss=0.972, v_num=0, val_loss_epoch=0.207, train_loss=0.882, val_loss_step=0.268]#015Epoch 3:  92%|█████████▏| 6480/7036 [11:57<01:01,  9.03it/s, loss=1.02, v_num=0, val_loss_epoch=0.207, train_loss=0.579, val_loss_step=0.268] #015Epoch 3:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.03it/s, loss=1.02, v_num=0, val_loss_epoch=0.207, train_loss=0.579, val_loss_step=0.268]#015Epoch 3:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.03it/s, loss=0.848, v_num=0, val_loss_epoch=0.207, train_loss=1.280, val_loss_step=0.268]#015Epoch 3:  93%|█████████▎| 6540/7036 [12:04<00:54,  9.03it/s, loss=0.848, v_num=0, val_loss_epoch=0.207, trai

[34m9:54,  9.00it/s, loss=0.789, v_num=0, val_loss_epoch=0.205, train_loss=0.668, val_loss_step=0.174]#015Epoch 4:  24%|██▍       | 1680/7036 [03:06<09:54,  9.00it/s, loss=0.742, v_num=0, val_loss_epoch=0.205, train_loss=0.739, val_loss_step=0.174]#015Epoch 4:  24%|██▍       | 1710/7036 [03:09<09:51,  9.00it/s, loss=0.742, v_num=0, val_loss_epoch=0.205, train_loss=0.739, val_loss_step=0.174]#015Epoch 4:  24%|██▍       | 1710/7036 [03:09<09:51,  9.00it/s, loss=0.816, v_num=0, val_loss_epoch=0.205, train_loss=1.010, val_loss_step=0.174]#015Epoch 4:  25%|██▍       | 1740/7036 [03:13<09:48,  9.00it/s, loss=0.816, v_num=0, val_loss_epoch=0.205, train_loss=1.010, val_loss_step=0.174]#015Epoch 4:  25%|██▍       | 1740/7036 [03:13<09:48,  9.00it/s, loss=0.785, v_num=0, val_loss_epoch=0.205, train_loss=0.418, val_loss_step=0.174]#015Epoch 4:  25%|██▌       | 1770/7036 [03:16<09:44,  9.01it/s, loss=0.785, v_num=0, val_loss_epoch=0.205, train_loss=0.418, val_loss_step=0.174]#015Epoch 4:  25%|██▌

[34m�██▋     | 3300/7036 [06:06<06:55,  9.00it/s, loss=0.771, v_num=0, val_loss_epoch=0.205, train_loss=1.100, val_loss_step=0.174]#015Epoch 4:  47%|████▋     | 3330/7036 [06:10<06:51,  9.00it/s, loss=0.771, v_num=0, val_loss_epoch=0.205, train_loss=1.100, val_loss_step=0.174]#015Epoch 4:  47%|████▋     | 3330/7036 [06:10<06:51,  9.00it/s, loss=1.24, v_num=0, val_loss_epoch=0.205, train_loss=0.737, val_loss_step=0.174] #015Epoch 4:  48%|████▊     | 3360/7036 [06:13<06:48,  9.00it/s, loss=1.24, v_num=0, val_loss_epoch=0.205, train_loss=0.737, val_loss_step=0.174]#015Epoch 4:  48%|████▊     | 3360/7036 [06:13<06:48,  9.00it/s, loss=0.746, v_num=0, val_loss_epoch=0.205, train_loss=1.040, val_loss_step=0.174]#015Epoch 4:  48%|████▊     | 3390/7036 [06:16<06:45,  9.00it/s, loss=0.746, v_num=0, val_loss_epoch=0.205, train_loss=1.040, val_loss_step=0.174]#015Epoch 4:  48%|████▊     | 3390/7036 [06:16<06:45,  9.00it/s, loss=0.785, v_num=0, val_loss_epoch=0.205, train_loss=0.550, val_loss_step

[34m05, train_loss=0.507, val_loss_step=0.174]#015Epoch 4:  69%|██████▉   | 4890/7036 [09:02<03:58,  9.01it/s, loss=0.799, v_num=0, val_loss_epoch=0.205, train_loss=0.873, val_loss_step=0.174]#015Epoch 4:  70%|██████▉   | 4920/7036 [09:06<03:54,  9.01it/s, loss=0.799, v_num=0, val_loss_epoch=0.205, train_loss=0.873, val_loss_step=0.174]#015Epoch 4:  70%|██████▉   | 4920/7036 [09:06<03:54,  9.01it/s, loss=0.847, v_num=0, val_loss_epoch=0.205, train_loss=1.060, val_loss_step=0.174]#015Epoch 4:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=0.847, v_num=0, val_loss_epoch=0.205, train_loss=1.060, val_loss_step=0.174]#015Epoch 4:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=0.949, v_num=0, val_loss_epoch=0.205, train_loss=0.775, val_loss_step=0.174]#015Epoch 4:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=0.949, v_num=0, val_loss_epoch=0.205, train_loss=0.775, val_loss_step=0.174]#015Epoch 4:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=1.06, v

[34mt/s, loss=0.796, v_num=0, val_loss_epoch=0.205, train_loss=0.852, val_loss_step=0.174]#015Epoch 4:  92%|█████████▏| 6450/7036 [11:55<01:05,  9.01it/s, loss=0.796, v_num=0, val_loss_epoch=0.205, train_loss=0.852, val_loss_step=0.174]#015Epoch 4:  92%|█████████▏| 6450/7036 [11:55<01:05,  9.01it/s, loss=1.17, v_num=0, val_loss_epoch=0.205, train_loss=2.430, val_loss_step=0.174] #015Epoch 4:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.01it/s, loss=1.17, v_num=0, val_loss_epoch=0.205, train_loss=2.430, val_loss_step=0.174]#015Epoch 4:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.01it/s, loss=0.808, v_num=0, val_loss_epoch=0.205, train_loss=0.467, val_loss_step=0.174]#015Epoch 4:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=0.808, v_num=0, val_loss_epoch=0.205, train_loss=0.467, val_loss_step=0.174]#015Epoch 4:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=0.753, v_num=0, val_loss_epoch=0.205, train_loss=0.642, val_loss_step=0.174]#015Epoch 4:  93%|█████████▎| 6540

[34m9:54,  9.02it/s, loss=0.852, v_num=0, val_loss_epoch=0.205, train_loss=0.975, val_loss_step=0.176]#015Epoch 5:  24%|██▍       | 1680/7036 [03:06<09:54,  9.02it/s, loss=0.693, v_num=0, val_loss_epoch=0.205, train_loss=0.637, val_loss_step=0.176]#015Epoch 5:  24%|██▍       | 1710/7036 [03:09<09:50,  9.01it/s, loss=0.693, v_num=0, val_loss_epoch=0.205, train_loss=0.637, val_loss_step=0.176]#015Epoch 5:  24%|██▍       | 1710/7036 [03:09<09:50,  9.01it/s, loss=0.719, v_num=0, val_loss_epoch=0.205, train_loss=0.596, val_loss_step=0.176]#015Epoch 5:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=0.719, v_num=0, val_loss_epoch=0.205, train_loss=0.596, val_loss_step=0.176]#015Epoch 5:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=0.775, v_num=0, val_loss_epoch=0.205, train_loss=1.500, val_loss_step=0.176]#015Epoch 5:  25%|██▌       | 1770/7036 [03:16<09:44,  9.02it/s, loss=0.775, v_num=0, val_loss_epoch=0.205, train_loss=1.500, val_loss_step=0.176]#015Epoch 5:  25%|██▌

[34m 5:  47%|████▋     | 3300/7036 [06:05<06:53,  9.03it/s, loss=0.826, v_num=0, val_loss_epoch=0.205, train_loss=0.607, val_loss_step=0.176]#015Epoch 5:  47%|████▋     | 3330/7036 [06:08<06:50,  9.03it/s, loss=0.826, v_num=0, val_loss_epoch=0.205, train_loss=0.607, val_loss_step=0.176]#015Epoch 5:  47%|████▋     | 3330/7036 [06:08<06:50,  9.03it/s, loss=0.725, v_num=0, val_loss_epoch=0.205, train_loss=0.621, val_loss_step=0.176]#015Epoch 5:  48%|████▊     | 3360/7036 [06:12<06:47,  9.03it/s, loss=0.725, v_num=0, val_loss_epoch=0.205, train_loss=0.621, val_loss_step=0.176]#015Epoch 5:  48%|████▊     | 3360/7036 [06:12<06:47,  9.03it/s, loss=0.658, v_num=0, val_loss_epoch=0.205, train_loss=0.679, val_loss_step=0.176]#015Epoch 5:  48%|████▊     | 3390/7036 [06:15<06:43,  9.03it/s, loss=0.658, v_num=0, val_loss_epoch=0.205, train_loss=0.679, val_loss_step=0.176]#015Epoch 5:  48%|████▊     | 3390/7036 [06:15<06:43,  9.03it/s, loss=0.723, v_num=0, val_loss_epoch=0.205, train_loss=0.785, va

[34m=0, val_loss_epoch=0.205, train_loss=0.569, val_loss_step=0.176]#015Epoch 5:  69%|██████▉   | 4890/7036 [09:01<03:57,  9.03it/s, loss=0.736, v_num=0, val_loss_epoch=0.205, train_loss=1.340, val_loss_step=0.176]#015Epoch 5:  70%|██████▉   | 4920/7036 [09:04<03:54,  9.03it/s, loss=0.736, v_num=0, val_loss_epoch=0.205, train_loss=1.340, val_loss_step=0.176]#015Epoch 5:  70%|██████▉   | 4920/7036 [09:04<03:54,  9.03it/s, loss=0.856, v_num=0, val_loss_epoch=0.205, train_loss=0.496, val_loss_step=0.176]#015Epoch 5:  70%|███████   | 4950/7036 [09:08<03:51,  9.03it/s, loss=0.856, v_num=0, val_loss_epoch=0.205, train_loss=0.496, val_loss_step=0.176]#015Epoch 5:  70%|███████   | 4950/7036 [09:08<03:51,  9.03it/s, loss=0.753, v_num=0, val_loss_epoch=0.205, train_loss=0.610, val_loss_step=0.176]#015Epoch 5:  71%|███████   | 4980/7036 [09:11<03:47,  9.03it/s, loss=0.753, v_num=0, val_loss_epoch=0.205, train_loss=0.610, val_loss_step=0.176]#015Epoch 5:  71%|███████   | 4980/7036 [09:11<03:47,  

[34m�� | 6420/7036 [11:50<01:08,  9.03it/s, loss=0.641, v_num=0, val_loss_epoch=0.205, train_loss=0.646, val_loss_step=0.176]#015Epoch 5:  92%|█████████▏| 6450/7036 [11:54<01:04,  9.03it/s, loss=0.641, v_num=0, val_loss_epoch=0.205, train_loss=0.646, val_loss_step=0.176]#015Epoch 5:  92%|█████████▏| 6450/7036 [11:54<01:04,  9.03it/s, loss=0.909, v_num=0, val_loss_epoch=0.205, train_loss=0.555, val_loss_step=0.176]#015Epoch 5:  92%|█████████▏| 6480/7036 [11:57<01:01,  9.03it/s, loss=0.909, v_num=0, val_loss_epoch=0.205, train_loss=0.555, val_loss_step=0.176]#015Epoch 5:  92%|█████████▏| 6480/7036 [11:57<01:01,  9.03it/s, loss=0.873, v_num=0, val_loss_epoch=0.205, train_loss=1.160, val_loss_step=0.176]#015Epoch 5:  93%|█████████▎| 6510/7036 [12:00<00:58,  9.03it/s, loss=0.873, v_num=0, val_loss_epoch=0.205, train_loss=1.160, val_loss_step=0.176]#015Epoch 5:  93%|█████████▎| 6510/7036 [12:00<00:58,  9.03it/s, loss=0.83, v_num=0, val_loss_epoch=0.205, train_loss=0.775, val_loss_step=0.176

[34m36 [03:37<09:26,  8.97it/s, loss=1.91, v_num=1, val_loss_epoch=0.708, train_loss=1.880]#015Epoch 0:  28%|██▊       | 1950/7036 [03:37<09:26,  8.97it/s, loss=2.05, v_num=1, val_loss_epoch=0.708, train_loss=5.020]#015Epoch 0:  28%|██▊       | 1980/7036 [03:40<09:23,  8.98it/s, loss=2.05, v_num=1, val_loss_epoch=0.708, train_loss=5.020]#015Epoch 0:  28%|██▊       | 1980/7036 [03:40<09:23,  8.98it/s, loss=1.69, v_num=1, val_loss_epoch=0.708, train_loss=1.910]#015Epoch 0:  29%|██▊       | 2010/7036 [03:43<09:19,  8.98it/s, loss=1.69, v_num=1, val_loss_epoch=0.708, train_loss=1.910]#015Epoch 0:  29%|██▊       | 2010/7036 [03:43<09:19,  8.98it/s, loss=1.84, v_num=1, val_loss_epoch=0.708, train_loss=1.200]#015Epoch 0:  29%|██▉       | 2040/7036 [03:47<09:16,  8.98it/s, loss=1.84, v_num=1, val_loss_epoch=0.708, train_loss=1.200]#015Epoch 0:  29%|██▉       | 2040/7036 [03:47<09:16,  8.98it/s, loss=1.84, v_num=1, val_loss_epoch=0.708, train_loss=1.240]#015Epoch 0:  29%|██▉       | 2070/7036 

[34m5:54,  9.01it/s, loss=1.69, v_num=1, val_loss_epoch=0.708, train_loss=1.520]#015Epoch 0:  55%|█████▍    | 3840/7036 [07:06<05:54,  9.01it/s, loss=1.86, v_num=1, val_loss_epoch=0.708, train_loss=2.180]#015Epoch 0:  55%|█████▌    | 3870/7036 [07:09<05:51,  9.01it/s, loss=1.86, v_num=1, val_loss_epoch=0.708, train_loss=2.180]#015Epoch 0:  55%|█████▌    | 3870/7036 [07:09<05:51,  9.01it/s, loss=2.29, v_num=1, val_loss_epoch=0.708, train_loss=1.770]#015Epoch 0:  55%|█████▌    | 3900/7036 [07:12<05:48,  9.01it/s, loss=2.29, v_num=1, val_loss_epoch=0.708, train_loss=1.770]#015Epoch 0:  55%|█████▌    | 3900/7036 [07:12<05:48,  9.01it/s, loss=2.07, v_num=1, val_loss_epoch=0.708, train_loss=1.580]#015Epoch 0:  56%|█████▌    | 3930/7036 [07:16<05:44,  9.01it/s, loss=2.07, v_num=1, val_loss_epoch=0.708, train_loss=1.580]#015Epoch 0:  56%|█████▌    | 3930/7036 [07:16<05:44,  9.01it/s, loss=1.57, v_num=1, val_loss_epoch=0.708, train_loss=1.810]#015Epoch 0:  56%|█████▋    | 3960/7036 [07:19<05:4

[34m1, val_loss_epoch=0.708, train_loss=2.680]#015Epoch 0:  81%|████████  | 5670/7036 [10:28<02:31,  9.02it/s, loss=1.78, v_num=1, val_loss_epoch=0.708, train_loss=2.680]#015Epoch 0:  81%|████████  | 5670/7036 [10:28<02:31,  9.02it/s, loss=2.19, v_num=1, val_loss_epoch=0.708, train_loss=2.360]#015Epoch 0:  81%|████████  | 5700/7036 [10:32<02:28,  9.01it/s, loss=2.19, v_num=1, val_loss_epoch=0.708, train_loss=2.360]#015Epoch 0:  81%|████████  | 5700/7036 [10:32<02:28,  9.01it/s, loss=2.38, v_num=1, val_loss_epoch=0.708, train_loss=1.640]#015Epoch 0:  81%|████████▏ | 5730/7036 [10:35<02:24,  9.02it/s, loss=2.38, v_num=1, val_loss_epoch=0.708, train_loss=1.640]#015Epoch 0:  81%|████████▏ | 5730/7036 [10:35<02:24,  9.02it/s, loss=1.7, v_num=1, val_loss_epoch=0.708, train_loss=1.320] #015Epoch 0:  82%|████████▏ | 5760/7036 [10:38<02:21,  9.01it/s, loss=1.7, v_num=1, val_loss_epoch=0.708, train_loss=1.320]#015Epoch 0:  82%|████████▏ | 5760/7036 [10:38<02:21,  9.01it/s, loss=1.55, v_num=1, v

[34m#015Validating:   8%|▊         | 30/383 [00:02<00:33, 10.48it/s]#033[A#015Epoch 0:  95%|█████████▌| 6690/7036 [12:21<00:38,  9.03it/s, loss=1.95, v_num=1, val_loss_epoch=0.708, train_loss=5.610][0m
[34m#015Validating:  16%|█▌        | 60/383 [00:05<00:30, 10.46it/s]#033[A#015Epoch 0:  96%|█████████▌| 6720/7036 [12:24<00:34,  9.03it/s, loss=1.95, v_num=1, val_loss_epoch=0.708, train_loss=5.610][0m
[34m#015Validating:  23%|██▎       | 90/383 [00:08<00:28, 10.42it/s]#033[A#015Epoch 0:  96%|█████████▌| 6750/7036 [12:26<00:31,  9.04it/s, loss=1.95, v_num=1, val_loss_epoch=0.708, train_loss=5.610][0m
[34m#015Validating:  31%|███▏      | 120/383 [00:11<00:25, 10.52it/s]#033[A#015Epoch 0:  96%|█████████▋| 6780/7036 [12:29<00:28,  9.04it/s, loss=1.95, v_num=1, val_loss_epoch=0.708, train_loss=5.610][0m
[34m#015Validating:  39%|███▉      | 150/383 [00:14<00:22, 10.48it/s]#033[A#015Epoch 0:  97%|█████████▋| 6810/7036 [12:32<00:24,  9.05it/s, loss=1.95, v_num=1, val_loss_epoch=0.708, 

[34mh 1:  24%|██▍       | 1680/7036 [03:06<09:54,  9.00it/s, loss=2.09, v_num=1, val_loss_epoch=0.327, train_loss=1.730, val_loss_step=0.421]#015Epoch 1:  24%|██▍       | 1710/7036 [03:09<09:51,  9.00it/s, loss=2.09, v_num=1, val_loss_epoch=0.327, train_loss=1.730, val_loss_step=0.421]#015Epoch 1:  24%|██▍       | 1710/7036 [03:09<09:51,  9.00it/s, loss=2.11, v_num=1, val_loss_epoch=0.327, train_loss=2.590, val_loss_step=0.421]#015Epoch 1:  25%|██▍       | 1740/7036 [03:13<09:48,  9.00it/s, loss=2.11, v_num=1, val_loss_epoch=0.327, train_loss=2.590, val_loss_step=0.421]#015Epoch 1:  25%|██▍       | 1740/7036 [03:13<09:48,  9.00it/s, loss=1.73, v_num=1, val_loss_epoch=0.327, train_loss=1.560, val_loss_step=0.421]#015Epoch 1:  25%|██▌       | 1770/7036 [03:16<09:44,  9.00it/s, loss=1.73, v_num=1, val_loss_epoch=0.327, train_loss=1.560, val_loss_step=0.421]#015Epoch 1:  25%|██▌       | 1770/7036 [03:16<09:44,  9.00it/s, loss=1.86, v_num=1, val_loss_epoch=0.327, train_loss=1.390, val_loss

[34m9.00it/s, loss=2.25, v_num=1, val_loss_epoch=0.327, train_loss=1.690, val_loss_step=0.421]#015Epoch 1:  47%|████▋     | 3330/7036 [06:09<06:51,  9.00it/s, loss=1.76, v_num=1, val_loss_epoch=0.327, train_loss=1.380, val_loss_step=0.421]#015Epoch 1:  48%|████▊     | 3360/7036 [06:13<06:48,  9.00it/s, loss=1.76, v_num=1, val_loss_epoch=0.327, train_loss=1.380, val_loss_step=0.421]#015Epoch 1:  48%|████▊     | 3360/7036 [06:13<06:48,  9.00it/s, loss=1.9, v_num=1, val_loss_epoch=0.327, train_loss=5.880, val_loss_step=0.421] #015Epoch 1:  48%|████▊     | 3390/7036 [06:16<06:45,  9.00it/s, loss=1.9, v_num=1, val_loss_epoch=0.327, train_loss=5.880, val_loss_step=0.421]#015Epoch 1:  48%|████▊     | 3390/7036 [06:16<06:45,  9.00it/s, loss=2.22, v_num=1, val_loss_epoch=0.327, train_loss=1.470, val_loss_step=0.421]#015Epoch 1:  49%|████▊     | 3420/7036 [06:19<06:41,  9.00it/s, loss=2.22, v_num=1, val_loss_epoch=0.327, train_loss=1.470, val_loss_step=0.421]#015Epoch 1:  49%|████▊     | 3420/7

[34mv_num=1, val_loss_epoch=0.327, train_loss=1.800, val_loss_step=0.421]#015Epoch 1:  70%|██████▉   | 4920/7036 [09:06<03:54,  9.01it/s, loss=1.75, v_num=1, val_loss_epoch=0.327, train_loss=1.340, val_loss_step=0.421]#015Epoch 1:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=1.75, v_num=1, val_loss_epoch=0.327, train_loss=1.340, val_loss_step=0.421]#015Epoch 1:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=1.58, v_num=1, val_loss_epoch=0.327, train_loss=1.970, val_loss_step=0.421]#015Epoch 1:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=1.58, v_num=1, val_loss_epoch=0.327, train_loss=1.970, val_loss_step=0.421]#015Epoch 1:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=3, v_num=1, val_loss_epoch=0.327, train_loss=5.330, val_loss_step=0.421]   #015Epoch 1:  71%|███████   | 5010/7036 [09:16<03:44,  9.01it/s, loss=3, v_num=1, val_loss_epoch=0.327, train_loss=5.330, val_loss_step=0.421]#015Epoch 1:  71%|███████   | 5010/7036 [09:16<03:44,  9.01

[34m27, train_loss=1.330, val_loss_step=0.421]#015Epoch 1:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.01it/s, loss=1.91, v_num=1, val_loss_epoch=0.327, train_loss=1.330, val_loss_step=0.421]#015Epoch 1:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.01it/s, loss=1.95, v_num=1, val_loss_epoch=0.327, train_loss=5.560, val_loss_step=0.421]#015Epoch 1:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=1.95, v_num=1, val_loss_epoch=0.327, train_loss=5.560, val_loss_step=0.421]#015Epoch 1:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=1.6, v_num=1, val_loss_epoch=0.327, train_loss=2.140, val_loss_step=0.421] #015Epoch 1:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.01it/s, loss=1.6, v_num=1, val_loss_epoch=0.327, train_loss=2.140, val_loss_step=0.421]#015Epoch 1:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.01it/s, loss=1.62, v_num=1, val_loss_epoch=0.327, train_loss=1.220, val_loss_step=0.421]#015Epoch 1:  93%|█████████▎| 6570/7036 [12:09<00:51,  9.01it/s, loss=1.62, v_num=1,

[34m4%|██▍       | 1680/7036 [03:06<09:54,  9.01it/s, loss=1.54, v_num=1, val_loss_epoch=0.327, train_loss=1.220, val_loss_step=0.413]#015Epoch 2:  24%|██▍       | 1710/7036 [03:09<09:50,  9.01it/s, loss=1.54, v_num=1, val_loss_epoch=0.327, train_loss=1.220, val_loss_step=0.413]#015Epoch 2:  24%|██▍       | 1710/7036 [03:09<09:50,  9.01it/s, loss=2.01, v_num=1, val_loss_epoch=0.327, train_loss=0.887, val_loss_step=0.413]#015Epoch 2:  25%|██▍       | 1740/7036 [03:12<09:47,  9.02it/s, loss=2.01, v_num=1, val_loss_epoch=0.327, train_loss=0.887, val_loss_step=0.413]#015Epoch 2:  25%|██▍       | 1740/7036 [03:12<09:47,  9.02it/s, loss=2.45, v_num=1, val_loss_epoch=0.327, train_loss=1.260, val_loss_step=0.413]#015Epoch 2:  25%|██▌       | 1770/7036 [03:16<09:44,  9.02it/s, loss=2.45, v_num=1, val_loss_epoch=0.327, train_loss=1.260, val_loss_step=0.413]#015Epoch 2:  25%|██▌       | 1770/7036 [03:16<09:44,  9.02it/s, loss=1.43, v_num=1, val_loss_epoch=0.327, train_loss=1.310, val_loss_step=0

[34m/s, loss=2.36, v_num=1, val_loss_epoch=0.327, train_loss=2.430, val_loss_step=0.413]#015Epoch 2:  47%|████▋     | 3330/7036 [06:09<06:50,  9.02it/s, loss=1.97, v_num=1, val_loss_epoch=0.327, train_loss=1.120, val_loss_step=0.413]#015Epoch 2:  48%|████▊     | 3360/7036 [06:12<06:47,  9.02it/s, loss=1.97, v_num=1, val_loss_epoch=0.327, train_loss=1.120, val_loss_step=0.413]#015Epoch 2:  48%|████▊     | 3360/7036 [06:12<06:47,  9.02it/s, loss=1.98, v_num=1, val_loss_epoch=0.327, train_loss=5.430, val_loss_step=0.413]#015Epoch 2:  48%|████▊     | 3390/7036 [06:15<06:44,  9.02it/s, loss=1.98, v_num=1, val_loss_epoch=0.327, train_loss=5.430, val_loss_step=0.413]#015Epoch 2:  48%|████▊     | 3390/7036 [06:15<06:44,  9.02it/s, loss=1.99, v_num=1, val_loss_epoch=0.327, train_loss=1.420, val_loss_step=0.413]#015Epoch 2:  49%|████▊     | 3420/7036 [06:19<06:40,  9.02it/s, loss=1.99, v_num=1, val_loss_epoch=0.327, train_loss=1.420, val_loss_step=0.413]#015Epoch 2:  49%|████▊     | 3420/7036 [

[34m1, val_loss_epoch=0.327, train_loss=1.420, val_loss_step=0.413]#015Epoch 2:  70%|██████▉   | 4920/7036 [09:05<03:54,  9.02it/s, loss=1.68, v_num=1, val_loss_epoch=0.327, train_loss=1.160, val_loss_step=0.413]#015Epoch 2:  70%|███████   | 4950/7036 [09:08<03:51,  9.02it/s, loss=1.68, v_num=1, val_loss_epoch=0.327, train_loss=1.160, val_loss_step=0.413]#015Epoch 2:  70%|███████   | 4950/7036 [09:08<03:51,  9.02it/s, loss=2.22, v_num=1, val_loss_epoch=0.327, train_loss=1.270, val_loss_step=0.413]#015Epoch 2:  71%|███████   | 4980/7036 [09:12<03:47,  9.02it/s, loss=2.22, v_num=1, val_loss_epoch=0.327, train_loss=1.270, val_loss_step=0.413]#015Epoch 2:  71%|███████   | 4980/7036 [09:12<03:47,  9.02it/s, loss=1.74, v_num=1, val_loss_epoch=0.327, train_loss=5.480, val_loss_step=0.413]#015Epoch 2:  71%|███████   | 5010/7036 [09:15<03:44,  9.02it/s, loss=1.74, v_num=1, val_loss_epoch=0.327, train_loss=5.480, val_loss_step=0.413]#015Epoch 2:  71%|███████   | 5010/7036 [09:15<03:44,  9.02it/

[34m train_loss=1.400, val_loss_step=0.413]#015Epoch 2:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.02it/s, loss=2.28, v_num=1, val_loss_epoch=0.327, train_loss=1.400, val_loss_step=0.413]#015Epoch 2:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.02it/s, loss=2.13, v_num=1, val_loss_epoch=0.327, train_loss=1.640, val_loss_step=0.413]#015Epoch 2:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.02it/s, loss=2.13, v_num=1, val_loss_epoch=0.327, train_loss=1.640, val_loss_step=0.413]#015Epoch 2:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.02it/s, loss=1.73, v_num=1, val_loss_epoch=0.327, train_loss=1.840, val_loss_step=0.413]#015Epoch 2:  93%|█████████▎| 6540/7036 [12:04<00:54,  9.02it/s, loss=1.73, v_num=1, val_loss_epoch=0.327, train_loss=1.840, val_loss_step=0.413]#015Epoch 2:  93%|█████████▎| 6540/7036 [12:04<00:54,  9.02it/s, loss=1.57, v_num=1, val_loss_epoch=0.327, train_loss=1.800, val_loss_step=0.413]#015Epoch 2:  93%|█████████▎| 6570/7036 [12:08<00:51,  9.02it/s, loss=1.57, v_num=1, v

[34m██▍       | 1680/7036 [03:06<09:54,  9.01it/s, loss=1.88, v_num=1, val_loss_epoch=0.327, train_loss=1.570, val_loss_step=0.416]#015Epoch 3:  24%|██▍       | 1710/7036 [03:09<09:51,  9.00it/s, loss=1.88, v_num=1, val_loss_epoch=0.327, train_loss=1.570, val_loss_step=0.416]#015Epoch 3:  24%|██▍       | 1710/7036 [03:09<09:51,  9.00it/s, loss=1.94, v_num=1, val_loss_epoch=0.327, train_loss=1.120, val_loss_step=0.416]#015Epoch 3:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=1.94, v_num=1, val_loss_epoch=0.327, train_loss=1.120, val_loss_step=0.416]#015Epoch 3:  25%|██▍       | 1740/7036 [03:13<09:47,  9.01it/s, loss=2.27, v_num=1, val_loss_epoch=0.327, train_loss=1.670, val_loss_step=0.416]#015Epoch 3:  25%|██▌       | 1770/7036 [03:16<09:44,  9.00it/s, loss=2.27, v_num=1, val_loss_epoch=0.327, train_loss=1.670, val_loss_step=0.416]#015Epoch 3:  25%|██▌       | 1770/7036 [03:16<09:44,  9.00it/s, loss=2.21, v_num=1, val_loss_epoch=0.327, train_loss=1.880, val_loss_step=0.41

[34m/s, loss=1.86, v_num=1, val_loss_epoch=0.327, train_loss=6.020, val_loss_step=0.416]#015Epoch 3:  47%|████▋     | 3330/7036 [06:09<06:51,  9.01it/s, loss=1.9, v_num=1, val_loss_epoch=0.327, train_loss=1.880, val_loss_step=0.416] #015Epoch 3:  48%|████▊     | 3360/7036 [06:12<06:48,  9.01it/s, loss=1.9, v_num=1, val_loss_epoch=0.327, train_loss=1.880, val_loss_step=0.416]#015Epoch 3:  48%|████▊     | 3360/7036 [06:12<06:48,  9.01it/s, loss=1.58, v_num=1, val_loss_epoch=0.327, train_loss=1.370, val_loss_step=0.416]#015Epoch 3:  48%|████▊     | 3390/7036 [06:16<06:44,  9.01it/s, loss=1.58, v_num=1, val_loss_epoch=0.327, train_loss=1.370, val_loss_step=0.416]#015Epoch 3:  48%|████▊     | 3390/7036 [06:16<06:44,  9.01it/s, loss=1.69, v_num=1, val_loss_epoch=0.327, train_loss=1.980, val_loss_step=0.416]#015Epoch 3:  49%|████▊     | 3420/7036 [06:19<06:41,  9.01it/s, loss=1.69, v_num=1, val_loss_epoch=0.327, train_loss=1.980, val_loss_step=0.416]#015Epoch 3:  49%|████▊     | 3420/7036 [0

[34m_loss_epoch=0.327, train_loss=1.120, val_loss_step=0.416]#015Epoch 3:  70%|██████▉   | 4920/7036 [09:05<03:54,  9.01it/s, loss=1.73, v_num=1, val_loss_epoch=0.327, train_loss=1.340, val_loss_step=0.416]#015Epoch 3:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=1.73, v_num=1, val_loss_epoch=0.327, train_loss=1.340, val_loss_step=0.416]#015Epoch 3:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=2, v_num=1, val_loss_epoch=0.327, train_loss=2.100, val_loss_step=0.416]   #015Epoch 3:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=2, v_num=1, val_loss_epoch=0.327, train_loss=2.100, val_loss_step=0.416]#015Epoch 3:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=1.64, v_num=1, val_loss_epoch=0.327, train_loss=1.170, val_loss_step=0.416]#015Epoch 3:  71%|███████   | 5010/7036 [09:15<03:44,  9.01it/s, loss=1.64, v_num=1, val_loss_epoch=0.327, train_loss=1.170, val_loss_step=0.416]#015Epoch 3:  71%|███████   | 5010/7036 [09:15<03:44,  9.01it/s, loss=1

[34m.980, val_loss_step=0.416]#015Epoch 3:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.02it/s, loss=2.34, v_num=1, val_loss_epoch=0.327, train_loss=4.980, val_loss_step=0.416]#015Epoch 3:  92%|█████████▏| 6480/7036 [11:58<01:01,  9.02it/s, loss=1.82, v_num=1, val_loss_epoch=0.327, train_loss=1.720, val_loss_step=0.416]#015Epoch 3:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.02it/s, loss=1.82, v_num=1, val_loss_epoch=0.327, train_loss=1.720, val_loss_step=0.416]#015Epoch 3:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.02it/s, loss=1.9, v_num=1, val_loss_epoch=0.327, train_loss=2.180, val_loss_step=0.416] #015Epoch 3:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.02it/s, loss=1.9, v_num=1, val_loss_epoch=0.327, train_loss=2.180, val_loss_step=0.416]#015Epoch 3:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.02it/s, loss=1.52, v_num=1, val_loss_epoch=0.327, train_loss=1.700, val_loss_step=0.416]#015Epoch 3:  93%|█████████▎| 6570/7036 [12:08<00:51,  9.02it/s, loss=1.52, v_num=1, val_loss_epoch=

[34m 24%|██▍       | 1680/7036 [03:06<09:53,  9.02it/s, loss=2.3, v_num=1, val_loss_epoch=0.327, train_loss=1.100, val_loss_step=0.421] #015Epoch 4:  24%|██▍       | 1710/7036 [03:09<09:50,  9.02it/s, loss=2.3, v_num=1, val_loss_epoch=0.327, train_loss=1.100, val_loss_step=0.421]#015Epoch 4:  24%|██▍       | 1710/7036 [03:09<09:50,  9.02it/s, loss=1.63, v_num=1, val_loss_epoch=0.327, train_loss=1.480, val_loss_step=0.421]#015Epoch 4:  25%|██▍       | 1740/7036 [03:12<09:47,  9.02it/s, loss=1.63, v_num=1, val_loss_epoch=0.327, train_loss=1.480, val_loss_step=0.421]#015Epoch 4:  25%|██▍       | 1740/7036 [03:12<09:47,  9.02it/s, loss=2.08, v_num=1, val_loss_epoch=0.327, train_loss=1.250, val_loss_step=0.421]#015Epoch 4:  25%|██▌       | 1770/7036 [03:16<09:43,  9.02it/s, loss=2.08, v_num=1, val_loss_epoch=0.327, train_loss=1.250, val_loss_step=0.421]#015Epoch 4:  25%|██▌       | 1770/7036 [03:16<09:43,  9.02it/s, loss=2.01, v_num=1, val_loss_epoch=0.327, train_loss=0.907, val_loss_step=

[34m loss=1.71, v_num=1, val_loss_epoch=0.327, train_loss=2.060, val_loss_step=0.421]#015Epoch 4:  47%|████▋     | 3330/7036 [06:08<06:50,  9.03it/s, loss=2.39, v_num=1, val_loss_epoch=0.327, train_loss=5.170, val_loss_step=0.421]#015Epoch 4:  48%|████▊     | 3360/7036 [06:12<06:47,  9.03it/s, loss=2.39, v_num=1, val_loss_epoch=0.327, train_loss=5.170, val_loss_step=0.421]#015Epoch 4:  48%|████▊     | 3360/7036 [06:12<06:47,  9.03it/s, loss=1.51, v_num=1, val_loss_epoch=0.327, train_loss=2.340, val_loss_step=0.421]#015Epoch 4:  48%|████▊     | 3390/7036 [06:15<06:44,  9.02it/s, loss=1.51, v_num=1, val_loss_epoch=0.327, train_loss=2.340, val_loss_step=0.421]#015Epoch 4:  48%|████▊     | 3390/7036 [06:15<06:44,  9.02it/s, loss=1.93, v_num=1, val_loss_epoch=0.327, train_loss=1.390, val_loss_step=0.421]#015Epoch 4:  49%|████▊     | 3420/7036 [06:18<06:40,  9.03it/s, loss=1.93, v_num=1, val_loss_epoch=0.327, train_loss=1.390, val_loss_step=0.421]#015Epoch 4:  49%|████▊     | 3420/7036 [06:

[34mval_loss_epoch=0.327, train_loss=1.410, val_loss_step=0.421]#015Epoch 4:  70%|██████▉   | 4920/7036 [09:05<03:54,  9.03it/s, loss=1.56, v_num=1, val_loss_epoch=0.327, train_loss=2.310, val_loss_step=0.421]#015Epoch 4:  70%|███████   | 4950/7036 [09:08<03:51,  9.02it/s, loss=1.56, v_num=1, val_loss_epoch=0.327, train_loss=2.310, val_loss_step=0.421]#015Epoch 4:  70%|███████   | 4950/7036 [09:08<03:51,  9.02it/s, loss=1.71, v_num=1, val_loss_epoch=0.327, train_loss=1.260, val_loss_step=0.421]#015Epoch 4:  71%|███████   | 4980/7036 [09:11<03:47,  9.03it/s, loss=1.71, v_num=1, val_loss_epoch=0.327, train_loss=1.260, val_loss_step=0.421]#015Epoch 4:  71%|███████   | 4980/7036 [09:11<03:47,  9.03it/s, loss=1.97, v_num=1, val_loss_epoch=0.327, train_loss=1.660, val_loss_step=0.421]#015Epoch 4:  71%|███████   | 5010/7036 [09:15<03:44,  9.02it/s, loss=1.97, v_num=1, val_loss_epoch=0.327, train_loss=1.660, val_loss_step=0.421]#015Epoch 4:  71%|███████   | 5010/7036 [09:15<03:44,  9.02it/s, 

[34m_loss=1.590, val_loss_step=0.421]#015Epoch 4:  92%|█████████▏| 6480/7036 [11:57<01:01,  9.03it/s, loss=1.76, v_num=1, val_loss_epoch=0.327, train_loss=1.590, val_loss_step=0.421]#015Epoch 4:  92%|█████████▏| 6480/7036 [11:57<01:01,  9.03it/s, loss=1.72, v_num=1, val_loss_epoch=0.327, train_loss=1.090, val_loss_step=0.421]#015Epoch 4:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.03it/s, loss=1.72, v_num=1, val_loss_epoch=0.327, train_loss=1.090, val_loss_step=0.421]#015Epoch 4:  93%|█████████▎| 6510/7036 [12:01<00:58,  9.03it/s, loss=1.8, v_num=1, val_loss_epoch=0.327, train_loss=1.650, val_loss_step=0.421] #015Epoch 4:  93%|█████████▎| 6540/7036 [12:04<00:54,  9.02it/s, loss=1.8, v_num=1, val_loss_epoch=0.327, train_loss=1.650, val_loss_step=0.421]#015Epoch 4:  93%|█████████▎| 6540/7036 [12:04<00:54,  9.02it/s, loss=1.61, v_num=1, val_loss_epoch=0.327, train_loss=1.470, val_loss_step=0.421]#015Epoch 4:  93%|█████████▎| 6570/7036 [12:07<00:51,  9.03it/s, loss=1.61, v_num=1, val_loss

[34m  24%|██▍       | 1680/7036 [03:06<09:56,  8.99it/s, loss=1.65, v_num=1, val_loss_epoch=0.326, train_loss=1.450, val_loss_step=0.412]#015Epoch 5:  24%|██▍       | 1710/7036 [03:10<09:52,  8.98it/s, loss=1.65, v_num=1, val_loss_epoch=0.326, train_loss=1.450, val_loss_step=0.412]#015Epoch 5:  24%|██▍       | 1710/7036 [03:10<09:52,  8.98it/s, loss=2.17, v_num=1, val_loss_epoch=0.326, train_loss=1.650, val_loss_step=0.412]#015Epoch 5:  25%|██▍       | 1740/7036 [03:13<09:49,  8.98it/s, loss=2.17, v_num=1, val_loss_epoch=0.326, train_loss=1.650, val_loss_step=0.412]#015Epoch 5:  25%|██▍       | 1740/7036 [03:13<09:49,  8.98it/s, loss=1.67, v_num=1, val_loss_epoch=0.326, train_loss=1.460, val_loss_step=0.412]#015Epoch 5:  25%|██▌       | 1770/7036 [03:16<09:46,  8.99it/s, loss=1.67, v_num=1, val_loss_epoch=0.326, train_loss=1.460, val_loss_step=0.412]#015Epoch 5:  25%|██▌       | 1770/7036 [03:16<09:46,  8.99it/s, loss=1.96, v_num=1, val_loss_epoch=0.326, train_loss=1.960, val_loss_ste

[34m  9.00it/s, loss=1.59, v_num=1, val_loss_epoch=0.326, train_loss=2.210, val_loss_step=0.412]#015Epoch 5:  47%|████▋     | 3330/7036 [06:09<06:51,  9.00it/s, loss=1.66, v_num=1, val_loss_epoch=0.326, train_loss=5.450, val_loss_step=0.412]#015Epoch 5:  48%|████▊     | 3360/7036 [06:13<06:48,  9.00it/s, loss=1.66, v_num=1, val_loss_epoch=0.326, train_loss=5.450, val_loss_step=0.412]#015Epoch 5:  48%|████▊     | 3360/7036 [06:13<06:48,  9.00it/s, loss=1.91, v_num=1, val_loss_epoch=0.326, train_loss=2.010, val_loss_step=0.412]#015Epoch 5:  48%|████▊     | 3390/7036 [06:16<06:45,  9.00it/s, loss=1.91, v_num=1, val_loss_epoch=0.326, train_loss=2.010, val_loss_step=0.412]#015Epoch 5:  48%|████▊     | 3390/7036 [06:16<06:45,  9.00it/s, loss=2.06, v_num=1, val_loss_epoch=0.326, train_loss=2.680, val_loss_step=0.412]#015Epoch 5:  49%|████▊     | 3420/7036 [06:19<06:41,  9.00it/s, loss=2.06, v_num=1, val_loss_epoch=0.326, train_loss=2.680, val_loss_step=0.412]#015Epoch 5:  49%|████▊     | 342

[34m, v_num=1, val_loss_epoch=0.326, train_loss=1.210, val_loss_step=0.412]#015Epoch 5:  70%|██████▉   | 4920/7036 [09:06<03:54,  9.01it/s, loss=1.47, v_num=1, val_loss_epoch=0.326, train_loss=1.070, val_loss_step=0.412]#015Epoch 5:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=1.47, v_num=1, val_loss_epoch=0.326, train_loss=1.070, val_loss_step=0.412]#015Epoch 5:  70%|███████   | 4950/7036 [09:09<03:51,  9.01it/s, loss=1.96, v_num=1, val_loss_epoch=0.326, train_loss=1.250, val_loss_step=0.412]#015Epoch 5:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=1.96, v_num=1, val_loss_epoch=0.326, train_loss=1.250, val_loss_step=0.412]#015Epoch 5:  71%|███████   | 4980/7036 [09:12<03:48,  9.01it/s, loss=2.64, v_num=1, val_loss_epoch=0.326, train_loss=1.570, val_loss_step=0.412]#015Epoch 5:  71%|███████   | 5010/7036 [09:16<03:44,  9.01it/s, loss=2.64, v_num=1, val_loss_epoch=0.326, train_loss=1.570, val_loss_step=0.412]#015Epoch 5:  71%|███████   | 5010/7036 [09:16<03:44, 

[34m0.326, train_loss=1.300, val_loss_step=0.412]#015Epoch 5:  92%|█████████▏| 6480/7036 [11:59<01:01,  9.01it/s, loss=1.78, v_num=1, val_loss_epoch=0.326, train_loss=1.300, val_loss_step=0.412]#015Epoch 5:  92%|█████████▏| 6480/7036 [11:59<01:01,  9.01it/s, loss=1.79, v_num=1, val_loss_epoch=0.326, train_loss=1.570, val_loss_step=0.412]#015Epoch 5:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=1.79, v_num=1, val_loss_epoch=0.326, train_loss=1.570, val_loss_step=0.412]#015Epoch 5:  93%|█████████▎| 6510/7036 [12:02<00:58,  9.01it/s, loss=2.44, v_num=1, val_loss_epoch=0.326, train_loss=2.220, val_loss_step=0.412]#015Epoch 5:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.01it/s, loss=2.44, v_num=1, val_loss_epoch=0.326, train_loss=2.220, val_loss_step=0.412]#015Epoch 5:  93%|█████████▎| 6540/7036 [12:05<00:55,  9.01it/s, loss=2.15, v_num=1, val_loss_epoch=0.326, train_loss=1.320, val_loss_step=0.412]#015Epoch 5:  93%|█████████▎| 6570/7036 [12:08<00:51,  9.01it/s, loss=2.15, v_nu

[34m#015                                                           #033[A#015Epoch 0:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.37, v_num=0, val_loss_epoch=0.209, train_loss=0.993, val_loss_step=0.164]           #015Epoch 1:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.37, v_num=0, val_loss_epoch=0.209, train_loss=0.993, val_loss_step=0.164]#015Epoch 1:   2%|▏         | 30/1614 [00:03<03:14,  8.15it/s, loss=1.37, v_num=0, val_loss_epoch=0.209, train_loss=0.993, val_loss_step=0.164]#015Epoch 1:   2%|▏         | 30/1614 [00:03<03:14,  8.15it/s, loss=1.33, v_num=0, val_loss_epoch=0.209, train_loss=0.667, val_loss_step=0.164]#015Epoch 1:   4%|▎         | 60/1614 [00:06<02:59,  8.64it/s, loss=1.33, v_num=0, val_loss_epoch=0.209, train_loss=0.667, val_loss_step=0.164]#015Epoch 1:   4%|▎         | 60/1614 [00:06<02:59,  8.63it/s, loss=1.32, v_num=0, val_loss_epoch=0.209, train_loss=1.780, val_loss_step=0.164]#015Epoch 1:   6%|▌         | 90/1614 [00:10<02:54,  8.73it/s, loss=1.32, v

[34m#015                                                           #033[A#015Epoch 1:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.35, v_num=0, val_loss_epoch=0.197, train_loss=1.510, val_loss_step=0.260]           #015Epoch 2:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.35, v_num=0, val_loss_epoch=0.197, train_loss=1.510, val_loss_step=0.260]#015Epoch 2:   2%|▏         | 30/1614 [00:03<03:06,  8.48it/s, loss=1.35, v_num=0, val_loss_epoch=0.197, train_loss=1.510, val_loss_step=0.260]#015Epoch 2:   2%|▏         | 30/1614 [00:03<03:06,  8.48it/s, loss=1.26, v_num=0, val_loss_epoch=0.197, train_loss=0.387, val_loss_step=0.260]#015Epoch 2:   4%|▎         | 60/1614 [00:06<02:59,  8.68it/s, loss=1.26, v_num=0, val_loss_epoch=0.197, train_loss=0.387, val_loss_step=0.260]#015Epoch 2:   4%|▎         | 60/1614 [00:06<02:59,  8.68it/s, loss=1.12, v_num=0, val_loss_epoch=0.197, train_loss=0.328, val_loss_step=0.260]#015Epoch 2:   6%|▌         | 90/1614 [00:10<02:52,  8.85it/s, loss=1.12, v

[34m#015Validating:  35%|███▍      | 30/86 [00:02<00:05, 10.61it/s]#033[A#015Epoch 2:  97%|█████████▋| 1560/1614 [02:52<00:05,  9.06it/s, loss=1.31, v_num=0, val_loss_epoch=0.197, train_loss=1.430, val_loss_step=0.260][0m
[34m#015Validating:  70%|██████▉   | 60/86 [00:05<00:02, 10.42it/s]#033[A#015Epoch 2:  99%|█████████▊| 1590/1614 [02:55<00:02,  9.07it/s, loss=1.31, v_num=0, val_loss_epoch=0.197, train_loss=1.430, val_loss_step=0.260][0m
[34m#015Validating: 100%|██████████| 86/86 [00:08<00:00, 10.46it/s]#033[A#015Epoch 2: 100%|██████████| 1614/1614 [02:57<00:00,  9.08it/s, loss=1.48, v_num=0, val_loss_epoch=0.182, train_loss=1.300, val_loss_step=0.158][0m
[34m#015                                                           #033[A#015Epoch 2:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.48, v_num=0, val_loss_epoch=0.182, train_loss=1.300, val_loss_step=0.158]           #015Epoch 3:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.48, v_num=0, val_loss_epoch=0.182, train_loss=1.

[34m#015                                                           #033[A#015Epoch 3:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.812, v_num=0, val_loss_epoch=0.185, train_loss=0.675, val_loss_step=0.161]           #015Epoch 4:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.812, v_num=0, val_loss_epoch=0.185, train_loss=0.675, val_loss_step=0.161]#015Epoch 4:   2%|▏         | 30/1614 [00:03<03:06,  8.48it/s, loss=0.812, v_num=0, val_loss_epoch=0.185, train_loss=0.675, val_loss_step=0.161]#015Epoch 4:   2%|▏         | 30/1614 [00:03<03:06,  8.48it/s, loss=0.903, v_num=0, val_loss_epoch=0.185, train_loss=0.419, val_loss_step=0.161]#015Epoch 4:   4%|▎         | 60/1614 [00:06<02:59,  8.68it/s, loss=0.903, v_num=0, val_loss_epoch=0.185, train_loss=0.419, val_loss_step=0.161]#015Epoch 4:   4%|▎         | 60/1614 [00:06<02:59,  8.68it/s, loss=0.745, v_num=0, val_loss_epoch=0.185, train_loss=0.676, val_loss_step=0.161]#015Epoch 4:   6%|▌         | 90/1614 [00:10<02:53,  8.77it/s, loss=0

[34m#015Validating:  35%|███▍      | 30/86 [00:02<00:05, 10.74it/s]#033[A#015Epoch 4:  97%|█████████▋| 1560/1614 [02:52<00:05,  9.05it/s, loss=0.777, v_num=0, val_loss_epoch=0.185, train_loss=1.300, val_loss_step=0.161][0m
[34m#015Validating:  70%|██████▉   | 60/86 [00:05<00:02, 10.54it/s]#033[A#015Epoch 4:  99%|█████████▊| 1590/1614 [02:55<00:02,  9.06it/s, loss=0.777, v_num=0, val_loss_epoch=0.185, train_loss=1.300, val_loss_step=0.161][0m
[34m#015Validating: 100%|██████████| 86/86 [00:08<00:00, 10.59it/s]#033[A#015Epoch 4: 100%|██████████| 1614/1614 [02:57<00:00,  9.07it/s, loss=0.607, v_num=0, val_loss_epoch=0.185, train_loss=0.166, val_loss_step=0.149][0m
[34m#015                                                           #033[A#015Epoch 4:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.607, v_num=0, val_loss_epoch=0.185, train_loss=0.166, val_loss_step=0.149]           #015Epoch 5:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.607, v_num=0, val_loss_epoch=0.185, train_lo

[34m#015Validating:  35%|███▍      | 30/86 [00:02<00:05, 10.64it/s]#033[A#015Epoch 5:  97%|█████████▋| 1560/1614 [02:52<00:05,  9.03it/s, loss=0.883, v_num=0, val_loss_epoch=0.185, train_loss=0.266, val_loss_step=0.149][0m
[34m#015Validating:  70%|██████▉   | 60/86 [00:05<00:02, 10.44it/s]#033[A#015Epoch 5:  99%|█████████▊| 1590/1614 [02:55<00:02,  9.04it/s, loss=0.883, v_num=0, val_loss_epoch=0.185, train_loss=0.266, val_loss_step=0.149][0m
[34m#015Validating: 100%|██████████| 86/86 [00:08<00:00, 10.48it/s]#033[A#015Epoch 5: 100%|██████████| 1614/1614 [02:58<00:00,  9.05it/s, loss=0.767, v_num=0, val_loss_epoch=0.183, train_loss=1.370, val_loss_step=0.126][0m
[34m#015                                                           #033[A#015Epoch 5: 100%|██████████| 1614/1614 [03:09<00:00,  8.52it/s, loss=0.767, v_num=0, val_loss_epoch=0.183, train_loss=1.370, val_loss_step=0.126]#015Validation sanity check: 0it [00:00, ?it/s]#015Validation sanity check:   0%|          | 0/2 [00:00<?

[34m#015                                                           #033[A#015Epoch 0:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.36, v_num=1, val_loss_epoch=0.190, train_loss=0.941, val_loss_step=0.181]           #015Epoch 1:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.36, v_num=1, val_loss_epoch=0.190, train_loss=0.941, val_loss_step=0.181]#015Epoch 1:   2%|▏         | 30/1614 [00:03<03:11,  8.26it/s, loss=1.36, v_num=1, val_loss_epoch=0.190, train_loss=0.941, val_loss_step=0.181]#015Epoch 1:   2%|▏         | 30/1614 [00:03<03:11,  8.26it/s, loss=1.23, v_num=1, val_loss_epoch=0.190, train_loss=0.494, val_loss_step=0.181]#015Epoch 1:   4%|▎         | 60/1614 [00:06<02:57,  8.74it/s, loss=1.23, v_num=1, val_loss_epoch=0.190, train_loss=0.494, val_loss_step=0.181]#015Epoch 1:   4%|▎         | 60/1614 [00:06<02:57,  8.74it/s, loss=1.05, v_num=1, val_loss_epoch=0.190, train_loss=0.623, val_loss_step=0.181]#015Epoch 1:   6%|▌         | 90/1614 [00:10<02:52,  8.81it/s, loss=1.05, v

[34m#015                                                           #033[A#015Epoch 1:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.08, v_num=1, val_loss_epoch=0.172, train_loss=0.342, val_loss_step=0.114]           #015Epoch 2:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.08, v_num=1, val_loss_epoch=0.172, train_loss=0.342, val_loss_step=0.114]#015Epoch 2:   2%|▏         | 30/1614 [00:03<03:06,  8.51it/s, loss=1.08, v_num=1, val_loss_epoch=0.172, train_loss=0.342, val_loss_step=0.114]#015Epoch 2:   2%|▏         | 30/1614 [00:03<03:06,  8.50it/s, loss=1.01, v_num=1, val_loss_epoch=0.172, train_loss=1.640, val_loss_step=0.114]#015Epoch 2:   4%|▎         | 60/1614 [00:06<02:58,  8.70it/s, loss=1.01, v_num=1, val_loss_epoch=0.172, train_loss=1.640, val_loss_step=0.114]#015Epoch 2:   4%|▎         | 60/1614 [00:06<02:58,  8.70it/s, loss=1.22, v_num=1, val_loss_epoch=0.172, train_loss=0.992, val_loss_step=0.114]#015Epoch 2:   6%|▌         | 90/1614 [00:10<02:51,  8.88it/s, loss=1.22, v

[34m#015                                                           #033[A#015Epoch 2:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.786, v_num=1, val_loss_epoch=0.177, train_loss=0.263, val_loss_step=0.0891]           #015Epoch 3:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.786, v_num=1, val_loss_epoch=0.177, train_loss=0.263, val_loss_step=0.0891]#015Epoch 3:   2%|▏         | 30/1614 [00:03<03:12,  8.24it/s, loss=0.786, v_num=1, val_loss_epoch=0.177, train_loss=0.263, val_loss_step=0.0891]#015Epoch 3:   2%|▏         | 30/1614 [00:03<03:12,  8.24it/s, loss=0.927, v_num=1, val_loss_epoch=0.177, train_loss=0.671, val_loss_step=0.0891]#015Epoch 3:   4%|▎         | 60/1614 [00:06<02:58,  8.71it/s, loss=0.927, v_num=1, val_loss_epoch=0.177, train_loss=0.671, val_loss_step=0.0891]#015Epoch 3:   4%|▎         | 60/1614 [00:06<02:58,  8.71it/s, loss=0.75, v_num=1, val_loss_epoch=0.177, train_loss=0.308, val_loss_step=0.0891] #015Epoch 3:   6%|▌         | 90/1614 [00:10<02:53,  8.79it/s, 

[34m#015                                                           #033[A#015Epoch 3:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.03, v_num=1, val_loss_epoch=0.165, train_loss=0.162, val_loss_step=0.101]           #015Epoch 4:   0%|          | 0/1614 [00:00<?, ?it/s, loss=1.03, v_num=1, val_loss_epoch=0.165, train_loss=0.162, val_loss_step=0.101]#015Epoch 4:   2%|▏         | 30/1614 [00:03<03:04,  8.57it/s, loss=1.03, v_num=1, val_loss_epoch=0.165, train_loss=0.162, val_loss_step=0.101]#015Epoch 4:   2%|▏         | 30/1614 [00:03<03:04,  8.57it/s, loss=0.874, v_num=1, val_loss_epoch=0.165, train_loss=0.440, val_loss_step=0.101]#015Epoch 4:   4%|▎         | 60/1614 [00:06<02:57,  8.74it/s, loss=0.874, v_num=1, val_loss_epoch=0.165, train_loss=0.440, val_loss_step=0.101]#015Epoch 4:   4%|▎         | 60/1614 [00:06<02:57,  8.74it/s, loss=0.835, v_num=1, val_loss_epoch=0.165, train_loss=1.290, val_loss_step=0.101]#015Epoch 4:   6%|▌         | 90/1614 [00:10<02:53,  8.80it/s, loss=0.83

[34m#015Validating:  35%|███▍      | 30/86 [00:02<00:05, 10.70it/s]#033[A#015Epoch 4:  97%|█████████▋| 1560/1614 [02:52<00:05,  9.06it/s, loss=1.06, v_num=1, val_loss_epoch=0.165, train_loss=0.754, val_loss_step=0.101][0m
[34m#015Validating:  70%|██████▉   | 60/86 [00:05<00:02, 10.45it/s]#033[A#015Epoch 4:  99%|█████████▊| 1590/1614 [02:55<00:02,  9.07it/s, loss=1.06, v_num=1, val_loss_epoch=0.165, train_loss=0.754, val_loss_step=0.101][0m
[34m#015Validating: 100%|██████████| 86/86 [00:08<00:00, 10.50it/s]#033[A#015Epoch 4: 100%|██████████| 1614/1614 [02:57<00:00,  9.08it/s, loss=0.862, v_num=1, val_loss_epoch=0.166, train_loss=0.537, val_loss_step=0.095][0m
[34m#015                                                           #033[A#015Epoch 4:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.862, v_num=1, val_loss_epoch=0.166, train_loss=0.537, val_loss_step=0.095]           #015Epoch 5:   0%|          | 0/1614 [00:00<?, ?it/s, loss=0.862, v_num=1, val_loss_epoch=0.166, train_loss

[34m#015Validating:  35%|███▍      | 30/86 [00:02<00:05, 10.66it/s]#033[A#015Epoch 5:  97%|█████████▋| 1560/1614 [02:52<00:05,  9.03it/s, loss=0.661, v_num=1, val_loss_epoch=0.166, train_loss=0.420, val_loss_step=0.095][0m
[34m#015Validating:  70%|██████▉   | 60/86 [00:05<00:02, 10.44it/s]#033[A#015Epoch 5:  99%|█████████▊| 1590/1614 [02:55<00:02,  9.05it/s, loss=0.661, v_num=1, val_loss_epoch=0.166, train_loss=0.420, val_loss_step=0.095][0m
[34m#015Validating: 100%|██████████| 86/86 [00:08<00:00, 10.49it/s]#033[A#015Epoch 5: 100%|██████████| 1614/1614 [02:58<00:00,  9.06it/s, loss=0.57, v_num=1, val_loss_epoch=0.167, train_loss=1.110, val_loss_step=0.0941][0m
[34m#015                                                           #033[A#015Epoch 5: 100%|██████████| 1614/1614 [03:15<00:00,  8.25it/s, loss=0.57, v_num=1, val_loss_epoch=0.167, train_loss=1.110, val_loss_step=0.0941]#015Validation sanity check: 0it [00:00, ?it/s]#015Validation sanity check:   0%|          | 0/2 [00:00<?

[34mss_epoch=0.704, train_loss=1.250]#015Epoch 0:  64%|██████▍   | 1890/2950 [03:29<01:57,  9.02it/s, loss=1.14, v_num=0, val_loss_epoch=0.704, train_loss=1.130]#015Epoch 0:  65%|██████▌   | 1920/2950 [03:32<01:54,  9.02it/s, loss=1.14, v_num=0, val_loss_epoch=0.704, train_loss=1.130]#015Epoch 0:  65%|██████▌   | 1920/2950 [03:32<01:54,  9.02it/s, loss=1.08, v_num=0, val_loss_epoch=0.704, train_loss=1.400]#015Epoch 0:  66%|██████▌   | 1950/2950 [03:36<01:50,  9.01it/s, loss=1.08, v_num=0, val_loss_epoch=0.704, train_loss=1.400]#015Epoch 0:  66%|██████▌   | 1950/2950 [03:36<01:50,  9.01it/s, loss=1.05, v_num=0, val_loss_epoch=0.704, train_loss=1.010]#015Epoch 0:  67%|██████▋   | 1980/2950 [03:39<01:47,  9.02it/s, loss=1.05, v_num=0, val_loss_epoch=0.704, train_loss=1.010]#015Epoch 0:  67%|██████▋   | 1980/2950 [03:39<01:47,  9.02it/s, loss=1, v_num=0, val_loss_epoch=0.704, train_loss=1.150]   #015Epoch 0:  68%|██████▊   | 2010/2950 [03:42<01:44,  9.01it/s, loss=1, v_num=0, val_loss_epo

[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.42it/s]#033[A#015Epoch 0:  99%|█████████▊| 2910/2950 [05:17<00:04,  9.15it/s, loss=0.947, v_num=0, val_loss_epoch=0.704, train_loss=1.130][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.20it/s]#033[A#015Epoch 0: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.16it/s, loss=0.947, v_num=0, val_loss_epoch=0.704, train_loss=1.130][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.27it/s]#033[A[0m
[34m#015Validating: 100%|██████████| 160/160 [00:15<00:00, 10.39it/s]#033[A#015Epoch 0: 100%|██████████| 2950/2950 [05:24<00:00,  9.08it/s, loss=0.947, v_num=0, val_loss_epoch=0.128, train_loss=0.613, val_loss_step=0.145][0m
[34m#015                                                             #033[A#015Epoch 0:   0%|          | 0/2950 [00:00<?, ?it/s, loss=0.947, v_num=0, val_loss_epoch=0.128, train_loss=0.613, val_loss_step=0.145]           #015Epoch 1:   0%|          | 0/2950 [00:00<?, ?it/s, loss=

[34m.145]#015Epoch 1:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.02it/s, loss=0.752, v_num=0, val_loss_epoch=0.128, train_loss=0.855, val_loss_step=0.145]#015Epoch 1:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.02it/s, loss=0.714, v_num=0, val_loss_epoch=0.128, train_loss=0.506, val_loss_step=0.145]#015Epoch 1:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.714, v_num=0, val_loss_epoch=0.128, train_loss=0.506, val_loss_step=0.145]#015Epoch 1:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.749, v_num=0, val_loss_epoch=0.128, train_loss=0.475, val_loss_step=0.145]#015Epoch 1:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.749, v_num=0, val_loss_epoch=0.128, train_loss=0.475, val_loss_step=0.145]#015Epoch 1:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.827, v_num=0, val_loss_epoch=0.128, train_loss=1.510, val_loss_step=0.145]#015Epoch 1:  59%|█████▉    | 1740/2950 [03:12<02:14,  9.02it/s, loss=0.827, v_num=0, val_loss_epoch=0.128, train_

[34m#015Validating:  19%|█▉        | 30/160 [00:02<00:12, 10.28it/s]#033[A#015Epoch 1:  97%|█████████▋| 2850/2950 [05:12<00:10,  9.12it/s, loss=0.697, v_num=0, val_loss_epoch=0.128, train_loss=0.625, val_loss_step=0.145][0m
[34m#015Validating:  38%|███▊      | 60/160 [00:05<00:09, 10.36it/s]#033[A#015Epoch 1:  98%|█████████▊| 2880/2950 [05:15<00:07,  9.14it/s, loss=0.697, v_num=0, val_loss_epoch=0.128, train_loss=0.625, val_loss_step=0.145][0m
[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.27it/s]#033[A#015Epoch 1:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.14it/s, loss=0.697, v_num=0, val_loss_epoch=0.128, train_loss=0.625, val_loss_step=0.145][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.09it/s]#033[A#015Epoch 1: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.15it/s, loss=0.697, v_num=0, val_loss_epoch=0.128, train_loss=0.625, val_loss_step=0.145][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.16it/s]#033[A[0m
[34m#015Valid

[34mep=0.130]#015Epoch 2:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.606, v_num=0, val_loss_epoch=0.105, train_loss=0.458, val_loss_step=0.130]#015Epoch 2:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.569, v_num=0, val_loss_epoch=0.105, train_loss=0.332, val_loss_step=0.130]#015Epoch 2:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.01it/s, loss=0.569, v_num=0, val_loss_epoch=0.105, train_loss=0.332, val_loss_step=0.130]#015Epoch 2:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.01it/s, loss=0.522, v_num=0, val_loss_epoch=0.105, train_loss=0.518, val_loss_step=0.130]#015Epoch 2:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.522, v_num=0, val_loss_epoch=0.105, train_loss=0.518, val_loss_step=0.130]#015Epoch 2:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.648, v_num=0, val_loss_epoch=0.105, train_loss=0.473, val_loss_step=0.130]#015Epoch 2:  59%|█████▉    | 1740/2950 [03:13<02:14,  9.01it/s, loss=0.648, v_num=0, val_loss_epoch=0.105, tr

[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.33it/s]#033[A#015Epoch 2:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.14it/s, loss=0.649, v_num=0, val_loss_epoch=0.105, train_loss=1.030, val_loss_step=0.130][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.12it/s]#033[A#015Epoch 2: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.15it/s, loss=0.649, v_num=0, val_loss_epoch=0.105, train_loss=1.030, val_loss_step=0.130][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.15it/s]#033[A[0m
[34m#015Validating: 100%|██████████| 160/160 [00:15<00:00, 10.24it/s]#033[A#015Epoch 2: 100%|██████████| 2950/2950 [05:25<00:00,  9.07it/s, loss=0.649, v_num=0, val_loss_epoch=0.0965, train_loss=1.350, val_loss_step=0.121][0m
[34m#015                                                             #033[A#015Epoch 2:   0%|          | 0/2950 [00:00<?, ?it/s, loss=0.649, v_num=0, val_loss_epoch=0.0965, train_loss=1.350, val_loss_step=0.121]           #015Epoch 3:   

[34m  | 1620/2950 [02:59<02:27,  9.01it/s, loss=0.542, v_num=0, val_loss_epoch=0.0965, train_loss=1.060, val_loss_step=0.121]#015Epoch 3:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.542, v_num=0, val_loss_epoch=0.0965, train_loss=1.060, val_loss_step=0.121]#015Epoch 3:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.447, v_num=0, val_loss_epoch=0.0965, train_loss=0.414, val_loss_step=0.121]#015Epoch 3:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.447, v_num=0, val_loss_epoch=0.0965, train_loss=0.414, val_loss_step=0.121]#015Epoch 3:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.51, v_num=0, val_loss_epoch=0.0965, train_loss=0.437, val_loss_step=0.121] #015Epoch 3:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.51, v_num=0, val_loss_epoch=0.0965, train_loss=0.437, val_loss_step=0.121]#015Epoch 3:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.431, v_num=0, val_loss_epoch=0.0965, train_loss=0.486, val_loss_step

[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.49it/s]#033[A#015Epoch 3:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.13it/s, loss=0.534, v_num=0, val_loss_epoch=0.0965, train_loss=0.777, val_loss_step=0.121][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.25it/s]#033[A#015Epoch 3: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.14it/s, loss=0.534, v_num=0, val_loss_epoch=0.0965, train_loss=0.777, val_loss_step=0.121][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.33it/s]#033[A[0m
[34m#015Validating: 100%|██████████| 160/160 [00:15<00:00, 10.43it/s]#033[A#015Epoch 3: 100%|██████████| 2950/2950 [05:25<00:00,  9.06it/s, loss=0.534, v_num=0, val_loss_epoch=0.0916, train_loss=0.147, val_loss_step=0.146][0m
[34m#015                                                             #033[A#015Epoch 3:   0%|          | 0/2950 [00:00<?, ?it/s, loss=0.534, v_num=0, val_loss_epoch=0.0916, train_loss=0.147, val_loss_step=0.146]           #015Epoch 4: 

[34m  | 1620/2950 [02:59<02:27,  9.01it/s, loss=0.372, v_num=0, val_loss_epoch=0.0916, train_loss=0.119, val_loss_step=0.146]#015Epoch 4:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.372, v_num=0, val_loss_epoch=0.0916, train_loss=0.119, val_loss_step=0.146]#015Epoch 4:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.451, v_num=0, val_loss_epoch=0.0916, train_loss=0.728, val_loss_step=0.146]#015Epoch 4:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.451, v_num=0, val_loss_epoch=0.0916, train_loss=0.728, val_loss_step=0.146]#015Epoch 4:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.431, v_num=0, val_loss_epoch=0.0916, train_loss=0.659, val_loss_step=0.146]#015Epoch 4:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.431, v_num=0, val_loss_epoch=0.0916, train_loss=0.659, val_loss_step=0.146]#015Epoch 4:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.443, v_num=0, val_loss_epoch=0.0916, train_loss=0.492, val_loss_ste

[34m#015Validating:  19%|█▉        | 30/160 [00:02<00:12, 10.51it/s]#033[A#015Epoch 4:  97%|█████████▋| 2850/2950 [05:12<00:10,  9.12it/s, loss=0.415, v_num=0, val_loss_epoch=0.0916, train_loss=0.347, val_loss_step=0.146][0m
[34m#015Validating:  38%|███▊      | 60/160 [00:05<00:09, 10.58it/s]#033[A#015Epoch 4:  98%|█████████▊| 2880/2950 [05:15<00:07,  9.14it/s, loss=0.415, v_num=0, val_loss_epoch=0.0916, train_loss=0.347, val_loss_step=0.146][0m
[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.50it/s]#033[A#015Epoch 4:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.15it/s, loss=0.415, v_num=0, val_loss_epoch=0.0916, train_loss=0.347, val_loss_step=0.146][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.29it/s]#033[A#015Epoch 4: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.15it/s, loss=0.415, v_num=0, val_loss_epoch=0.0916, train_loss=0.347, val_loss_step=0.146][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.35it/s]#033[A[0m
[34m#015V

[34moss_step=0.114]#015Epoch 5:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.395, v_num=0, val_loss_epoch=0.091, train_loss=0.361, val_loss_step=0.114]#015Epoch 5:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.01it/s, loss=0.42, v_num=0, val_loss_epoch=0.091, train_loss=0.212, val_loss_step=0.114] #015Epoch 5:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.42, v_num=0, val_loss_epoch=0.091, train_loss=0.212, val_loss_step=0.114]#015Epoch 5:  57%|█████▋    | 1680/2950 [03:06<02:20,  9.01it/s, loss=0.387, v_num=0, val_loss_epoch=0.091, train_loss=0.161, val_loss_step=0.114]#015Epoch 5:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.387, v_num=0, val_loss_epoch=0.091, train_loss=0.161, val_loss_step=0.114]#015Epoch 5:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.01it/s, loss=0.334, v_num=0, val_loss_epoch=0.091, train_loss=0.283, val_loss_step=0.114]#015Epoch 5:  59%|█████▉    | 1740/2950 [03:13<02:14,  9.01it/s, loss=0.334, v_num=0, val_loss_epoch=0.09

[34m#015Validating:  19%|█▉        | 30/160 [00:02<00:12, 10.52it/s]#033[A#015Epoch 5:  97%|█████████▋| 2850/2950 [05:12<00:10,  9.12it/s, loss=0.376, v_num=0, val_loss_epoch=0.091, train_loss=0.571, val_loss_step=0.114][0m
[34m#015Validating:  38%|███▊      | 60/160 [00:05<00:09, 10.58it/s]#033[A#015Epoch 5:  98%|█████████▊| 2880/2950 [05:15<00:07,  9.13it/s, loss=0.376, v_num=0, val_loss_epoch=0.091, train_loss=0.571, val_loss_step=0.114][0m
[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.47it/s]#033[A#015Epoch 5:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.14it/s, loss=0.376, v_num=0, val_loss_epoch=0.091, train_loss=0.571, val_loss_step=0.114][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.28it/s]#033[A#015Epoch 5: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.15it/s, loss=0.376, v_num=0, val_loss_epoch=0.091, train_loss=0.571, val_loss_step=0.114][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.33it/s]#033[A[0m
[34m#015Valid

[34mum=1, val_loss_epoch=0.680, train_loss=0.565]#015Epoch 0:  64%|██████▍   | 1890/2950 [03:29<01:57,  9.02it/s, loss=1.02, v_num=1, val_loss_epoch=0.680, train_loss=0.685] #015Epoch 0:  65%|██████▌   | 1920/2950 [03:32<01:54,  9.02it/s, loss=1.02, v_num=1, val_loss_epoch=0.680, train_loss=0.685]#015Epoch 0:  65%|██████▌   | 1920/2950 [03:32<01:54,  9.02it/s, loss=0.932, v_num=1, val_loss_epoch=0.680, train_loss=0.857]#015Epoch 0:  66%|██████▌   | 1950/2950 [03:36<01:50,  9.02it/s, loss=0.932, v_num=1, val_loss_epoch=0.680, train_loss=0.857]#015Epoch 0:  66%|██████▌   | 1950/2950 [03:36<01:50,  9.02it/s, loss=0.851, v_num=1, val_loss_epoch=0.680, train_loss=0.855]#015Epoch 0:  67%|██████▋   | 1980/2950 [03:39<01:47,  9.02it/s, loss=0.851, v_num=1, val_loss_epoch=0.680, train_loss=0.855]#015Epoch 0:  67%|██████▋   | 1980/2950 [03:39<01:47,  9.02it/s, loss=0.973, v_num=1, val_loss_epoch=0.680, train_loss=1.600]#015Epoch 0:  68%|██████▊   | 2010/2950 [03:42<01:44,  9.02it/s, loss=0.973,

[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.41it/s]#033[A#015Epoch 0:  99%|█████████▊| 2910/2950 [05:17<00:04,  9.15it/s, loss=0.868, v_num=1, val_loss_epoch=0.680, train_loss=0.735][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.21it/s]#033[A#015Epoch 0: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.16it/s, loss=0.868, v_num=1, val_loss_epoch=0.680, train_loss=0.735][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.27it/s]#033[A[0m
[34m#015Validating: 100%|██████████| 160/160 [00:15<00:00, 10.40it/s]#033[A#015Epoch 0: 100%|██████████| 2950/2950 [05:24<00:00,  9.08it/s, loss=0.868, v_num=1, val_loss_epoch=0.120, train_loss=1.190, val_loss_step=0.0857][0m
[34m#015                                                             #033[A#015Epoch 0:   0%|          | 0/2950 [00:00<?, ?it/s, loss=0.868, v_num=1, val_loss_epoch=0.120, train_loss=1.190, val_loss_step=0.0857]           #015Epoch 1:   0%|          | 0/2950 [00:00<?, ?it/s, los

[34m��    | 1620/2950 [03:00<02:27,  9.00it/s, loss=0.754, v_num=1, val_loss_epoch=0.120, train_loss=0.506, val_loss_step=0.0857]#015Epoch 1:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.00it/s, loss=0.754, v_num=1, val_loss_epoch=0.120, train_loss=0.506, val_loss_step=0.0857]#015Epoch 1:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.00it/s, loss=0.512, v_num=1, val_loss_epoch=0.120, train_loss=0.631, val_loss_step=0.0857]#015Epoch 1:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.00it/s, loss=0.512, v_num=1, val_loss_epoch=0.120, train_loss=0.631, val_loss_step=0.0857]#015Epoch 1:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.00it/s, loss=0.788, v_num=1, val_loss_epoch=0.120, train_loss=1.050, val_loss_step=0.0857]#015Epoch 1:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.00it/s, loss=0.788, v_num=1, val_loss_epoch=0.120, train_loss=1.050, val_loss_step=0.0857]#015Epoch 1:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.00it/s, loss=0.739, v_num=1, val_loss_epoch=0.120, train_loss=0.979, val_loss_

[34m#015Validating:  19%|█▉        | 30/160 [00:02<00:12, 10.28it/s]#033[A#015Epoch 1:  97%|█████████▋| 2850/2950 [05:12<00:10,  9.12it/s, loss=0.634, v_num=1, val_loss_epoch=0.120, train_loss=0.642, val_loss_step=0.0857][0m
[34m#015Validating:  38%|███▊      | 60/160 [00:05<00:09, 10.36it/s]#033[A#015Epoch 1:  98%|█████████▊| 2880/2950 [05:15<00:07,  9.13it/s, loss=0.634, v_num=1, val_loss_epoch=0.120, train_loss=0.642, val_loss_step=0.0857][0m
[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.26it/s]#033[A#015Epoch 1:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.14it/s, loss=0.634, v_num=1, val_loss_epoch=0.120, train_loss=0.642, val_loss_step=0.0857][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.02it/s]#033[A#015Epoch 1: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.15it/s, loss=0.634, v_num=1, val_loss_epoch=0.120, train_loss=0.642, val_loss_step=0.0857][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.10it/s]#033[A[0m
[34m#015V

[34m | 1620/2950 [03:00<02:28,  8.98it/s, loss=0.545, v_num=1, val_loss_epoch=0.101, train_loss=0.555, val_loss_step=0.0689]#015Epoch 2:  56%|█████▌    | 1650/2950 [03:03<02:24,  8.99it/s, loss=0.545, v_num=1, val_loss_epoch=0.101, train_loss=0.555, val_loss_step=0.0689]#015Epoch 2:  56%|█████▌    | 1650/2950 [03:03<02:24,  8.99it/s, loss=0.55, v_num=1, val_loss_epoch=0.101, train_loss=0.689, val_loss_step=0.0689] #015Epoch 2:  57%|█████▋    | 1680/2950 [03:06<02:21,  8.99it/s, loss=0.55, v_num=1, val_loss_epoch=0.101, train_loss=0.689, val_loss_step=0.0689]#015Epoch 2:  57%|█████▋    | 1680/2950 [03:06<02:21,  8.99it/s, loss=0.57, v_num=1, val_loss_epoch=0.101, train_loss=0.419, val_loss_step=0.0689]#015Epoch 2:  58%|█████▊    | 1710/2950 [03:10<02:17,  8.99it/s, loss=0.57, v_num=1, val_loss_epoch=0.101, train_loss=0.419, val_loss_step=0.0689]#015Epoch 2:  58%|█████▊    | 1710/2950 [03:10<02:17,  8.99it/s, loss=0.579, v_num=1, val_loss_epoch=0.101, train_loss=0.695, val_loss_step=0.0

[34m#015                                                             #033[A#015Epoch 2:   0%|          | 0/2950 [00:00<?, ?it/s, loss=0.508, v_num=1, val_loss_epoch=0.0921, train_loss=0.639, val_loss_step=0.0567]           #015Epoch 3:   0%|          | 0/2950 [00:00<?, ?it/s, loss=0.508, v_num=1, val_loss_epoch=0.0921, train_loss=0.639, val_loss_step=0.0567]#015Epoch 3:   1%|          | 30/2950 [00:03<06:00,  8.10it/s, loss=0.508, v_num=1, val_loss_epoch=0.0921, train_loss=0.639, val_loss_step=0.0567]#015Epoch 3:   1%|          | 30/2950 [00:03<06:00,  8.10it/s, loss=0.447, v_num=1, val_loss_epoch=0.0921, train_loss=0.300, val_loss_step=0.0567]#015Epoch 3:   2%|▏         | 60/2950 [00:06<05:35,  8.63it/s, loss=0.447, v_num=1, val_loss_epoch=0.0921, train_loss=0.300, val_loss_step=0.0567]#015Epoch 3:   2%|▏         | 60/2950 [00:06<05:35,  8.62it/s, loss=0.461, v_num=1, val_loss_epoch=0.0921, train_loss=0.466, val_loss_step=0.0567]#015Epoch 3:   3%|▎         | 90/2950 [00:10<05:27,  8.

[34m9, v_num=1, val_loss_epoch=0.0921, train_loss=0.582, val_loss_step=0.0567]#015Epoch 3:  55%|█████▍    | 1620/2950 [03:00<02:27,  8.99it/s, loss=0.564, v_num=1, val_loss_epoch=0.0921, train_loss=0.557, val_loss_step=0.0567]#015Epoch 3:  56%|█████▌    | 1650/2950 [03:03<02:24,  8.99it/s, loss=0.564, v_num=1, val_loss_epoch=0.0921, train_loss=0.557, val_loss_step=0.0567]#015Epoch 3:  56%|█████▌    | 1650/2950 [03:03<02:24,  8.99it/s, loss=0.516, v_num=1, val_loss_epoch=0.0921, train_loss=0.516, val_loss_step=0.0567]#015Epoch 3:  57%|█████▋    | 1680/2950 [03:06<02:21,  8.99it/s, loss=0.516, v_num=1, val_loss_epoch=0.0921, train_loss=0.516, val_loss_step=0.0567]#015Epoch 3:  57%|█████▋    | 1680/2950 [03:06<02:21,  8.99it/s, loss=0.516, v_num=1, val_loss_epoch=0.0921, train_loss=0.304, val_loss_step=0.0567]#015Epoch 3:  58%|█████▊    | 1710/2950 [03:10<02:17,  8.99it/s, loss=0.516, v_num=1, val_loss_epoch=0.0921, train_loss=0.304, val_loss_step=0.0567]#015Epoch 3:  58%|█████▊    | 171

[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.39it/s]#033[A#015Epoch 3:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.13it/s, loss=0.487, v_num=1, val_loss_epoch=0.0921, train_loss=0.199, val_loss_step=0.0567][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.21it/s]#033[A#015Epoch 3: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.14it/s, loss=0.487, v_num=1, val_loss_epoch=0.0921, train_loss=0.199, val_loss_step=0.0567][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.21it/s]#033[A[0m
[34m#015Validating: 100%|██████████| 160/160 [00:15<00:00, 10.37it/s]#033[A#015Epoch 3: 100%|██████████| 2950/2950 [05:25<00:00,  9.06it/s, loss=0.487, v_num=1, val_loss_epoch=0.0901, train_loss=0.957, val_loss_step=0.0602][0m
[34m#015                                                             #033[A#015Epoch 3:   0%|          | 0/2950 [00:00<?, ?it/s, loss=0.487, v_num=1, val_loss_epoch=0.0901, train_loss=0.957, val_loss_step=0.0602]           #015Epoch

[34m v_num=1, val_loss_epoch=0.0901, train_loss=0.424, val_loss_step=0.0602]#015Epoch 4:  55%|█████▍    | 1620/2950 [02:59<02:27,  9.01it/s, loss=0.332, v_num=1, val_loss_epoch=0.0901, train_loss=0.586, val_loss_step=0.0602]#015Epoch 4:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.00it/s, loss=0.332, v_num=1, val_loss_epoch=0.0901, train_loss=0.586, val_loss_step=0.0602]#015Epoch 4:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.00it/s, loss=0.324, v_num=1, val_loss_epoch=0.0901, train_loss=0.627, val_loss_step=0.0602]#015Epoch 4:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.00it/s, loss=0.324, v_num=1, val_loss_epoch=0.0901, train_loss=0.627, val_loss_step=0.0602]#015Epoch 4:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.00it/s, loss=0.395, v_num=1, val_loss_epoch=0.0901, train_loss=0.554, val_loss_step=0.0602]#015Epoch 4:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.00it/s, loss=0.395, v_num=1, val_loss_epoch=0.0901, train_loss=0.554, val_loss_step=0.0602]#015Epoch 4:  58%|█████▊    | 1710/

[34m#015Validating:  19%|█▉        | 30/160 [00:02<00:12, 10.39it/s]#033[A#015Epoch 4:  97%|█████████▋| 2850/2950 [05:12<00:10,  9.11it/s, loss=0.379, v_num=1, val_loss_epoch=0.0901, train_loss=0.538, val_loss_step=0.0602][0m
[34m#015Validating:  38%|███▊      | 60/160 [00:05<00:09, 10.48it/s]#033[A#015Epoch 4:  98%|█████████▊| 2880/2950 [05:15<00:07,  9.12it/s, loss=0.379, v_num=1, val_loss_epoch=0.0901, train_loss=0.538, val_loss_step=0.0602][0m
[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.41it/s]#033[A#015Epoch 4:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.13it/s, loss=0.379, v_num=1, val_loss_epoch=0.0901, train_loss=0.538, val_loss_step=0.0602][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.22it/s]#033[A#015Epoch 4: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.14it/s, loss=0.379, v_num=1, val_loss_epoch=0.0901, train_loss=0.538, val_loss_step=0.0602][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.28it/s]#033[A[0m
[34m#

[34mnum=1, val_loss_epoch=0.0916, train_loss=0.284, val_loss_step=0.0585]#015Epoch 5:  55%|█████▍    | 1620/2950 [02:59<02:27,  9.00it/s, loss=0.342, v_num=1, val_loss_epoch=0.0916, train_loss=0.431, val_loss_step=0.0585]#015Epoch 5:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.00it/s, loss=0.342, v_num=1, val_loss_epoch=0.0916, train_loss=0.431, val_loss_step=0.0585]#015Epoch 5:  56%|█████▌    | 1650/2950 [03:03<02:24,  9.00it/s, loss=0.368, v_num=1, val_loss_epoch=0.0916, train_loss=0.252, val_loss_step=0.0585]#015Epoch 5:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.00it/s, loss=0.368, v_num=1, val_loss_epoch=0.0916, train_loss=0.252, val_loss_step=0.0585]#015Epoch 5:  57%|█████▋    | 1680/2950 [03:06<02:21,  9.00it/s, loss=0.341, v_num=1, val_loss_epoch=0.0916, train_loss=0.364, val_loss_step=0.0585]#015Epoch 5:  58%|█████▊    | 1710/2950 [03:09<02:17,  9.00it/s, loss=0.341, v_num=1, val_loss_epoch=0.0916, train_loss=0.364, val_loss_step=0.0585]#015Epoch 5:  58%|█████▊    | 1710/295

[34m#015Validating:  19%|█▉        | 30/160 [00:02<00:12, 10.24it/s]#033[A#015Epoch 5:  97%|█████████▋| 2850/2950 [05:12<00:10,  9.11it/s, loss=0.356, v_num=1, val_loss_epoch=0.0916, train_loss=0.116, val_loss_step=0.0585][0m
[34m#015Validating:  38%|███▊      | 60/160 [00:05<00:09, 10.35it/s]#033[A#015Epoch 5:  98%|█████████▊| 2880/2950 [05:15<00:07,  9.13it/s, loss=0.356, v_num=1, val_loss_epoch=0.0916, train_loss=0.116, val_loss_step=0.0585][0m
[34m#015Validating:  56%|█████▋    | 90/160 [00:08<00:06, 10.26it/s]#033[A#015Epoch 5:  99%|█████████▊| 2910/2950 [05:18<00:04,  9.14it/s, loss=0.356, v_num=1, val_loss_epoch=0.0916, train_loss=0.116, val_loss_step=0.0585][0m
[34m#015Validating:  75%|███████▌  | 120/160 [00:11<00:03, 10.07it/s]#033[A#015Epoch 5: 100%|█████████▉| 2940/2950 [05:21<00:01,  9.14it/s, loss=0.356, v_num=1, val_loss_epoch=0.0916, train_loss=0.116, val_loss_step=0.0585][0m
[34m#015Validating:  94%|█████████▍| 150/160 [00:14<00:00, 10.14it/s]#033[A[0m
[34m#

[34m#015Validating:  38%|███▊      | 30/78 [00:02<00:04, 10.65it/s]#033[A#015Epoch 0:  97%|█████████▋| 1410/1448 [02:34<00:04,  9.10it/s, loss=1.33, v_num=0, val_loss_epoch=0.697, train_loss=0.809][0m
[34m#015Validating:  77%|███████▋  | 60/78 [00:05<00:01, 10.50it/s]#033[A#015Epoch 0:  99%|█████████▉| 1440/1448 [02:37<00:00,  9.12it/s, loss=1.33, v_num=0, val_loss_epoch=0.697, train_loss=0.809][0m
[34m#015Validating: 100%|██████████| 78/78 [00:07<00:00, 10.51it/s]#033[A#015Epoch 0: 100%|██████████| 1448/1448 [02:39<00:00,  9.07it/s, loss=1.34, v_num=0, val_loss_epoch=0.263, train_loss=1.260, val_loss_step=0.268][0m
[34m#015                                                           #033[A#015Epoch 0:   0%|          | 0/1448 [00:00<?, ?it/s, loss=1.34, v_num=0, val_loss_epoch=0.263, train_loss=1.260, val_loss_step=0.268]           #015Epoch 1:   0%|          | 0/1448 [00:00<?, ?it/s, loss=1.34, v_num=0, val_loss_epoch=0.263, train_loss=1.260, val_loss_step=0.268]#015Epoch 1:   2%

[34m#015Validating:  38%|███▊      | 30/78 [00:02<00:04, 10.52it/s]#033[A#015Epoch 1:  97%|█████████▋| 1410/1448 [02:35<00:04,  9.09it/s, loss=1.09, v_num=0, val_loss_epoch=0.263, train_loss=1.420, val_loss_step=0.268][0m
[34m#015Validating:  77%|███████▋  | 60/78 [00:05<00:01, 10.39it/s]#033[A#015Epoch 1:  99%|█████████▉| 1440/1448 [02:38<00:00,  9.11it/s, loss=1.09, v_num=0, val_loss_epoch=0.263, train_loss=1.420, val_loss_step=0.268][0m
[34m#015Validating: 100%|██████████| 78/78 [00:07<00:00, 10.39it/s]#033[A#015Epoch 1: 100%|██████████| 1448/1448 [02:39<00:00,  9.06it/s, loss=0.946, v_num=0, val_loss_epoch=0.223, train_loss=0.547, val_loss_step=0.213][0m
[34m#015                                                           #033[A#015Epoch 1:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.946, v_num=0, val_loss_epoch=0.223, train_loss=0.547, val_loss_step=0.213]           #015Epoch 2:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.946, v_num=0, val_loss_epoch=0.223, train_loss

[34m#015                                                           #033[A#015Epoch 2:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.94, v_num=0, val_loss_epoch=0.209, train_loss=0.564, val_loss_step=0.205]           #015Epoch 3:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.94, v_num=0, val_loss_epoch=0.209, train_loss=0.564, val_loss_step=0.205]#015Epoch 3:   2%|▏         | 30/1448 [00:03<02:46,  8.52it/s, loss=0.94, v_num=0, val_loss_epoch=0.209, train_loss=0.564, val_loss_step=0.205]#015Epoch 3:   2%|▏         | 30/1448 [00:03<02:46,  8.52it/s, loss=0.829, v_num=0, val_loss_epoch=0.209, train_loss=0.428, val_loss_step=0.205]#015Epoch 3:   4%|▍         | 60/1448 [00:06<02:39,  8.70it/s, loss=0.829, v_num=0, val_loss_epoch=0.209, train_loss=0.428, val_loss_step=0.205]#015Epoch 3:   4%|▍         | 60/1448 [00:06<02:39,  8.70it/s, loss=0.829, v_num=0, val_loss_epoch=0.209, train_loss=0.760, val_loss_step=0.205]#015Epoch 3:   6%|▌         | 90/1448 [00:10<02:34,  8.78it/s, loss=0.82

[34m#015                                                           #033[A#015Epoch 3:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.628, v_num=0, val_loss_epoch=0.218, train_loss=0.764, val_loss_step=0.216]           #015Epoch 4:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.628, v_num=0, val_loss_epoch=0.218, train_loss=0.764, val_loss_step=0.216]#015Epoch 4:   2%|▏         | 30/1448 [00:03<02:52,  8.23it/s, loss=0.628, v_num=0, val_loss_epoch=0.218, train_loss=0.764, val_loss_step=0.216]#015Epoch 4:   2%|▏         | 30/1448 [00:03<02:52,  8.23it/s, loss=0.636, v_num=0, val_loss_epoch=0.218, train_loss=0.881, val_loss_step=0.216]#015Epoch 4:   4%|▍         | 60/1448 [00:06<02:39,  8.68it/s, loss=0.636, v_num=0, val_loss_epoch=0.218, train_loss=0.881, val_loss_step=0.216]#015Epoch 4:   4%|▍         | 60/1448 [00:06<02:39,  8.68it/s, loss=0.728, v_num=0, val_loss_epoch=0.218, train_loss=0.905, val_loss_step=0.216]#015Epoch 4:   6%|▌         | 90/1448 [00:10<02:34,  8.77it/s, loss=0

[34m#015Validating:  38%|███▊      | 30/78 [00:02<00:04, 10.58it/s]#033[A#015Epoch 4:  97%|█████████▋| 1410/1448 [02:34<00:04,  9.10it/s, loss=0.652, v_num=0, val_loss_epoch=0.218, train_loss=0.628, val_loss_step=0.216][0m
[34m#015Validating:  77%|███████▋  | 60/78 [00:05<00:01, 10.45it/s]#033[A#015Epoch 4:  99%|█████████▉| 1440/1448 [02:37<00:00,  9.12it/s, loss=0.652, v_num=0, val_loss_epoch=0.218, train_loss=0.628, val_loss_step=0.216][0m
[34m#015Validating: 100%|██████████| 78/78 [00:07<00:00, 10.47it/s]#033[A#015Epoch 4: 100%|██████████| 1448/1448 [02:39<00:00,  9.07it/s, loss=0.751, v_num=0, val_loss_epoch=0.216, train_loss=1.120, val_loss_step=0.196][0m
[34m#015                                                           #033[A#015Epoch 4:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.751, v_num=0, val_loss_epoch=0.216, train_loss=1.120, val_loss_step=0.196]           #015Epoch 5:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.751, v_num=0, val_loss_epoch=0.216, train_lo

[34m#015                                                           #033[A#015Epoch 5: 100%|██████████| 1448/1448 [02:56<00:00,  8.20it/s, loss=0.645, v_num=0, val_loss_epoch=0.218, train_loss=0.605, val_loss_step=0.186]#015Validation sanity check: 0it [00:00, ?it/s]#015Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]#015                                                              #015#015Training: 99it [00:00, ?it/s]#015Training:   0%|          | 0/1448 [00:00<?, ?it/s]#015Epoch 0:   0%|          | 0/1448 [00:00<?, ?it/s] #015Epoch 0:   2%|▏         | 30/1448 [00:03<02:48,  8.40it/s]#015Epoch 0:   2%|▏         | 30/1448 [00:03<02:48,  8.40it/s, loss=1.81, v_num=1, val_loss_epoch=0.700, train_loss=1.320]#015Epoch 0:   4%|▍         | 60/1448 [00:06<02:40,  8.63it/s, loss=1.81, v_num=1, val_loss_epoch=0.700, train_loss=1.320]#015Epoch 0:   4%|▍         | 60/1448 [00:06<02:40,  8.63it/s, loss=1.81, v_num=1, val_loss_epoch=0.700, train_loss=1.190]#015Epoch 0:   6%|▌         

[34m#015Validating:  38%|███▊      | 30/78 [00:02<00:04, 10.65it/s]#033[A#015Epoch 0:  97%|█████████▋| 1410/1448 [02:34<00:04,  9.10it/s, loss=1.33, v_num=1, val_loss_epoch=0.700, train_loss=1.850][0m
[34m#015Validating:  77%|███████▋  | 60/78 [00:05<00:01, 10.49it/s]#033[A#015Epoch 0:  99%|█████████▉| 1440/1448 [02:37<00:00,  9.12it/s, loss=1.33, v_num=1, val_loss_epoch=0.700, train_loss=1.850][0m
[34m#015Validating: 100%|██████████| 78/78 [00:07<00:00, 10.50it/s]#033[A#015Epoch 0: 100%|██████████| 1448/1448 [02:39<00:00,  9.07it/s, loss=1.12, v_num=1, val_loss_epoch=0.256, train_loss=0.884, val_loss_step=0.278][0m
[34m#015                                                           #033[A#015Epoch 0:   0%|          | 0/1448 [00:00<?, ?it/s, loss=1.12, v_num=1, val_loss_epoch=0.256, train_loss=0.884, val_loss_step=0.278]           #015Epoch 1:   0%|          | 0/1448 [00:00<?, ?it/s, loss=1.12, v_num=1, val_loss_epoch=0.256, train_loss=0.884, val_loss_step=0.278]#015Epoch 1:   2%

[34m#015Validating:  38%|███▊      | 30/78 [00:02<00:04, 10.67it/s]#033[A#015Epoch 1:  97%|█████████▋| 1410/1448 [02:34<00:04,  9.10it/s, loss=1.02, v_num=1, val_loss_epoch=0.256, train_loss=0.823, val_loss_step=0.278][0m
[34m#015Validating:  77%|███████▋  | 60/78 [00:05<00:01, 10.50it/s]#033[A#015Epoch 1:  99%|█████████▉| 1440/1448 [02:37<00:00,  9.12it/s, loss=1.02, v_num=1, val_loss_epoch=0.256, train_loss=0.823, val_loss_step=0.278][0m
[34m#015Validating: 100%|██████████| 78/78 [00:07<00:00, 10.51it/s]#033[A#015Epoch 1: 100%|██████████| 1448/1448 [02:39<00:00,  9.07it/s, loss=0.958, v_num=1, val_loss_epoch=0.224, train_loss=0.598, val_loss_step=0.173][0m
[34m#015                                                           #033[A#015Epoch 1:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.958, v_num=1, val_loss_epoch=0.224, train_loss=0.598, val_loss_step=0.173]           #015Epoch 2:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.958, v_num=1, val_loss_epoch=0.224, train_loss

[34m#015                                                           #033[A#015Epoch 2:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.861, v_num=1, val_loss_epoch=0.218, train_loss=0.966, val_loss_step=0.164]           #015Epoch 3:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.861, v_num=1, val_loss_epoch=0.218, train_loss=0.966, val_loss_step=0.164]#015Epoch 3:   2%|▏         | 30/1448 [00:03<02:47,  8.45it/s, loss=0.861, v_num=1, val_loss_epoch=0.218, train_loss=0.966, val_loss_step=0.164]#015Epoch 3:   2%|▏         | 30/1448 [00:03<02:47,  8.45it/s, loss=0.839, v_num=1, val_loss_epoch=0.218, train_loss=1.210, val_loss_step=0.164]#015Epoch 3:   4%|▍         | 60/1448 [00:06<02:39,  8.68it/s, loss=0.839, v_num=1, val_loss_epoch=0.218, train_loss=1.210, val_loss_step=0.164]#015Epoch 3:   4%|▍         | 60/1448 [00:06<02:39,  8.68it/s, loss=0.717, v_num=1, val_loss_epoch=0.218, train_loss=0.460, val_loss_step=0.164]#015Epoch 3:   6%|▌         | 90/1448 [00:10<02:34,  8.77it/s, loss=0

[34m#015                                                           #033[A#015Epoch 3:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.896, v_num=1, val_loss_epoch=0.205, train_loss=0.614, val_loss_step=0.144]           #015Epoch 4:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.896, v_num=1, val_loss_epoch=0.205, train_loss=0.614, val_loss_step=0.144]#015Epoch 4:   2%|▏         | 30/1448 [00:03<02:53,  8.15it/s, loss=0.896, v_num=1, val_loss_epoch=0.205, train_loss=0.614, val_loss_step=0.144]#015Epoch 4:   2%|▏         | 30/1448 [00:03<02:53,  8.15it/s, loss=0.743, v_num=1, val_loss_epoch=0.205, train_loss=0.535, val_loss_step=0.144]#015Epoch 4:   4%|▍         | 60/1448 [00:06<02:40,  8.65it/s, loss=0.743, v_num=1, val_loss_epoch=0.205, train_loss=0.535, val_loss_step=0.144]#015Epoch 4:   4%|▍         | 60/1448 [00:06<02:40,  8.65it/s, loss=0.701, v_num=1, val_loss_epoch=0.205, train_loss=0.725, val_loss_step=0.144]#015Epoch 4:   6%|▌         | 90/1448 [00:10<02:35,  8.73it/s, loss=0

[34m#015Validating:  38%|███▊      | 30/78 [00:02<00:04, 10.63it/s]#033[A#015Epoch 4:  97%|█████████▋| 1410/1448 [02:35<00:04,  9.09it/s, loss=0.721, v_num=1, val_loss_epoch=0.205, train_loss=0.646, val_loss_step=0.144][0m
[34m#015Validating:  77%|███████▋  | 60/78 [00:05<00:01, 10.46it/s]#033[A#015Epoch 4:  99%|█████████▉| 1440/1448 [02:38<00:00,  9.10it/s, loss=0.721, v_num=1, val_loss_epoch=0.205, train_loss=0.646, val_loss_step=0.144][0m
[34m#015Validating: 100%|██████████| 78/78 [00:07<00:00, 10.49it/s]#033[A#015Epoch 4: 100%|██████████| 1448/1448 [02:39<00:00,  9.05it/s, loss=0.664, v_num=1, val_loss_epoch=0.212, train_loss=0.569, val_loss_step=0.146][0m
[34m#015                                                           #033[A#015Epoch 4:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.664, v_num=1, val_loss_epoch=0.212, train_loss=0.569, val_loss_step=0.146]           #015Epoch 5:   0%|          | 0/1448 [00:00<?, ?it/s, loss=0.664, v_num=1, val_loss_epoch=0.212, train_lo

[34m#015Validating:  38%|███▊      | 30/78 [00:02<00:04, 10.62it/s]#033[A#015Epoch 5:  97%|█████████▋| 1410/1448 [02:35<00:04,  9.09it/s, loss=0.612, v_num=1, val_loss_epoch=0.212, train_loss=0.583, val_loss_step=0.146][0m
[34m#015Validating:  77%|███████▋  | 60/78 [00:05<00:01, 10.45it/s]#033[A#015Epoch 5:  99%|█████████▉| 1440/1448 [02:38<00:00,  9.10it/s, loss=0.612, v_num=1, val_loss_epoch=0.212, train_loss=0.583, val_loss_step=0.146][0m
[34m#015Validating: 100%|██████████| 78/78 [00:07<00:00, 10.48it/s]#033[A#015Epoch 5: 100%|██████████| 1448/1448 [02:39<00:00,  9.05it/s, loss=0.66, v_num=1, val_loss_epoch=0.211, train_loss=0.554, val_loss_step=0.148] [0m
[34m#015                                                           #033[A#015Epoch 5: 100%|██████████| 1448/1448 [02:56<00:00,  8.19it/s, loss=0.66, v_num=1, val_loss_epoch=0.211, train_loss=0.554, val_loss_step=0.148]#015Validation sanity check: 0it [00:00, ?it/s]#015Validation sanity check:   0%|          | 0/2 [00:00<?,

[34m#015                                                           #033[A#015Epoch 1:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.18, v_num=0, val_loss_epoch=0.209, train_loss=0.768, val_loss_step=0.232]          #015Epoch 2:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.18, v_num=0, val_loss_epoch=0.209, train_loss=0.768, val_loss_step=0.232]#015Epoch 2:   5%|▍         | 30/629 [00:03<01:12,  8.25it/s, loss=1.18, v_num=0, val_loss_epoch=0.209, train_loss=0.768, val_loss_step=0.232]#015Epoch 2:   5%|▍         | 30/629 [00:03<01:12,  8.25it/s, loss=1.19, v_num=0, val_loss_epoch=0.209, train_loss=1.410, val_loss_step=0.232]#015Epoch 2:  10%|▉         | 60/629 [00:06<01:05,  8.74it/s, loss=1.19, v_num=0, val_loss_epoch=0.209, train_loss=1.410, val_loss_step=0.232]#015Epoch 2:  10%|▉         | 60/629 [00:06<01:05,  8.74it/s, loss=1.28, v_num=0, val_loss_epoch=0.209, train_loss=1.100, val_loss_step=0.232]#015Epoch 2:  14%|█▍        | 90/629 [00:10<01:01,  8.79it/s, loss=1.28, v_num=0, 

[34m#015                                                           #033[A#015Epoch 3:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.05, v_num=0, val_loss_epoch=0.196, train_loss=1.060, val_loss_step=0.209]          #015Epoch 4:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.05, v_num=0, val_loss_epoch=0.196, train_loss=1.060, val_loss_step=0.209]#015Epoch 4:   5%|▍         | 30/629 [00:03<01:12,  8.25it/s, loss=1.05, v_num=0, val_loss_epoch=0.196, train_loss=1.060, val_loss_step=0.209]#015Epoch 4:   5%|▍         | 30/629 [00:03<01:12,  8.25it/s, loss=0.926, v_num=0, val_loss_epoch=0.196, train_loss=0.720, val_loss_step=0.209]#015Epoch 4:  10%|▉         | 60/629 [00:06<01:05,  8.73it/s, loss=0.926, v_num=0, val_loss_epoch=0.196, train_loss=0.720, val_loss_step=0.209]#015Epoch 4:  10%|▉         | 60/629 [00:06<01:05,  8.73it/s, loss=0.966, v_num=0, val_loss_epoch=0.196, train_loss=0.834, val_loss_step=0.209]#015Epoch 4:  14%|█▍        | 90/629 [00:10<01:01,  8.80it/s, loss=0.966, v_num

[34m#015                                                           #033[A#015Epoch 5: 100%|██████████| 629/629 [01:21<00:00,  7.73it/s, loss=0.717, v_num=0, val_loss_epoch=0.195, train_loss=0.344, val_loss_step=0.192]#015Validation sanity check: 0it [00:00, ?it/s]#015Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]#015                                                              #015#015Training: 99it [00:00, ?it/s]#015Training:   0%|          | 0/629 [00:00<?, ?it/s]#015Epoch 0:   0%|          | 0/629 [00:00<?, ?it/s] #015Epoch 0:   5%|▍         | 30/629 [00:03<01:10,  8.50it/s]#015Epoch 0:   5%|▍         | 30/629 [00:03<01:10,  8.49it/s, loss=1.98, v_num=1, val_loss_epoch=0.703, train_loss=2.120]#015Epoch 0:  10%|▉         | 60/629 [00:06<01:06,  8.58it/s, loss=1.98, v_num=1, val_loss_epoch=0.703, train_loss=2.120]#015Epoch 0:  10%|▉         | 60/629 [00:06<01:06,  8.58it/s, loss=2.06, v_num=1, val_loss_epoch=0.703, train_loss=1.370]#015Epoch 0:  14%|█▍        | 90/629

[34m#015                                                           #033[A#015Epoch 1:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.91, v_num=1, val_loss_epoch=0.315, train_loss=1.220, val_loss_step=0.364]          #015Epoch 2:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.91, v_num=1, val_loss_epoch=0.315, train_loss=1.220, val_loss_step=0.364]#015Epoch 2:   5%|▍         | 30/629 [00:03<01:13,  8.16it/s, loss=1.91, v_num=1, val_loss_epoch=0.315, train_loss=1.220, val_loss_step=0.364]#015Epoch 2:   5%|▍         | 30/629 [00:03<01:13,  8.16it/s, loss=1.87, v_num=1, val_loss_epoch=0.315, train_loss=2.660, val_loss_step=0.364]#015Epoch 2:  10%|▉         | 60/629 [00:06<01:05,  8.69it/s, loss=1.87, v_num=1, val_loss_epoch=0.315, train_loss=2.660, val_loss_step=0.364]#015Epoch 2:  10%|▉         | 60/629 [00:06<01:05,  8.69it/s, loss=2.01, v_num=1, val_loss_epoch=0.315, train_loss=1.870, val_loss_step=0.364]#015Epoch 2:  14%|█▍        | 90/629 [00:10<01:01,  8.76it/s, loss=2.01, v_num=1, 

[34m#015                                                           #033[A#015Epoch 3:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.77, v_num=1, val_loss_epoch=0.315, train_loss=0.998, val_loss_step=0.347]          #015Epoch 4:   0%|          | 0/629 [00:00<?, ?it/s, loss=1.77, v_num=1, val_loss_epoch=0.315, train_loss=0.998, val_loss_step=0.347]#015Epoch 4:   5%|▍         | 30/629 [00:03<01:12,  8.23it/s, loss=1.77, v_num=1, val_loss_epoch=0.315, train_loss=0.998, val_loss_step=0.347]#015Epoch 4:   5%|▍         | 30/629 [00:03<01:12,  8.23it/s, loss=2.22, v_num=1, val_loss_epoch=0.315, train_loss=1.970, val_loss_step=0.347]#015Epoch 4:  10%|▉         | 60/629 [00:06<01:05,  8.69it/s, loss=2.22, v_num=1, val_loss_epoch=0.315, train_loss=1.970, val_loss_step=0.347]#015Epoch 4:  10%|▉         | 60/629 [00:06<01:05,  8.68it/s, loss=1.93, v_num=1, val_loss_epoch=0.315, train_loss=2.250, val_loss_step=0.347]#015Epoch 4:  14%|█▍        | 90/629 [00:10<01:01,  8.75it/s, loss=1.93, v_num=1, 

[34m#015                                                           #033[A#015Epoch 5: 100%|██████████| 629/629 [01:16<00:00,  8.26it/s, loss=1.82, v_num=1, val_loss_epoch=0.305, train_loss=2.300, val_loss_step=0.353]#015Epoch 5: 100%|██████████| 7585/7585 [15:22:43<00:00,  7.30s/it, loss=0.546, v_num=1, val_loss_epoch=0.118, train_loss=0.616, val_loss_step=0.0833][0m
[34m#015Epoch 5: 100%|██████████| 5888/5888 [5:57:49<00:00,  3.65s/it, loss=0.472, v_num=1, val_loss_epoch=0.164, train_loss=0.391, val_loss_step=0.0212][0m
[34m#015Epoch 5: 100%|██████████| 5888/5888 [7:09:10<00:00,  4.37s/it, loss=0.511, v_num=0, val_loss_epoch=0.187, train_loss=0.346, val_loss_step=0.0501][0m
[34m#015Epoch 5: 100%|██████████| 1448/1448 [43:07<00:00,  1.79s/it, loss=0.645, v_num=0, val_loss_epoch=0.218, train_loss=0.605, val_loss_step=0.186][0m
[34m#015Epoch 4: 100%|██████████| 12538/12538 [11:40:33<00:00,  3.35s/it, loss=0.286, v_num=1, val_loss_epoch=0.446, train_loss=0.303, val_loss_step=2.14

[34m#015 48%|████▊     | 315/659 [00:36<00:41,  8.38it/s]#033[A[0m
[34m#015 48%|████▊     | 316/659 [00:36<00:41,  8.35it/s]#033[A[0m
[34m#015 48%|████▊     | 317/659 [00:36<00:41,  8.29it/s]#033[A[0m
[34m#015 48%|████▊     | 318/659 [00:36<00:40,  8.32it/s]#033[A[0m
[34m#015 48%|████▊     | 319/659 [00:37<00:42,  8.05it/s]#033[A[0m
[34m#015 49%|████▊     | 320/659 [00:37<00:41,  8.13it/s]#033[A[0m
[34m#015 49%|████▊     | 321/659 [00:37<00:41,  8.15it/s]#033[A[0m
[34m#015 49%|████▉     | 322/659 [00:37<00:41,  8.14it/s]#033[A[0m
[34m#015 49%|████▉     | 323/659 [00:37<00:41,  8.09it/s]#033[A[0m
[34m#015 49%|████▉     | 324/659 [00:37<00:41,  8.16it/s]#033[A[0m
[34m#015 49%|████▉     | 325/659 [00:37<00:40,  8.16it/s]#033[A[0m
[34m#015 49%|████▉     | 326/659 [00:37<00:40,  8.14it/s]#033[A[0m
[34m#015 50%|████▉     | 327/659 [00:38<00:40,  8.20it/s]#033[A[0m
[34m#015 50%|████▉     | 328/659 [00:38<00:40,  8.26it/s]#033[A[0m
[34m#015 50%|████▉     | 329/659 

[34m#015 60%|█████▉    | 395/659 [00:46<00:31,  8.38it/s]#033[A[0m
[34m#015 60%|██████    | 396/659 [00:46<00:31,  8.39it/s]#033[A[0m
[34m#015 60%|██████    | 397/659 [00:46<00:30,  8.45it/s]#033[A[0m
[34m#015 60%|██████    | 398/659 [00:46<00:30,  8.48it/s]#033[A[0m
[34m#015 61%|██████    | 399/659 [00:46<00:30,  8.51it/s]#033[A[0m
[34m#015 61%|██████    | 400/659 [00:46<00:30,  8.41it/s]#033[A[0m
[34m#015 61%|██████    | 401/659 [00:47<00:30,  8.33it/s]#033[A[0m
[34m#015 61%|██████    | 402/659 [00:47<00:32,  8.02it/s]#033[A[0m
[34m#015 61%|██████    | 403/659 [00:47<00:31,  8.09it/s]#033[A[0m
[34m#015 61%|██████▏   | 404/659 [00:47<00:31,  8.17it/s]#033[A[0m
[34m#015 61%|██████▏   | 405/659 [00:47<00:30,  8.20it/s]#033[A[0m
[34m#015 62%|██████▏   | 406/659 [00:47<00:31,  8.09it/s]#033[A[0m
[34m#015 62%|██████▏   | 407/659 [00:47<00:31,  8.10it/s]#033[A[0m
[34m#015 62%|██████▏   | 408/659 [00:47<00:31,  8.00it/s]#033[A[0m
[34m#015 62%|██████▏   | 409/659 

[34m#015 26%|██▌       | 51/197 [00:05<00:16,  8.89it/s]#033[A[0m
[34m#015 26%|██▋       | 52/197 [00:05<00:16,  8.90it/s]#033[A[0m
[34m#015 27%|██▋       | 53/197 [00:05<00:16,  8.93it/s]#033[A[0m
[34m#015 27%|██▋       | 54/197 [00:06<00:16,  8.80it/s]#033[A[0m
[34m#015 28%|██▊       | 55/197 [00:06<00:16,  8.81it/s]#033[A[0m
[34m#015 28%|██▊       | 56/197 [00:06<00:16,  8.71it/s]#033[A[0m
[34m#015 29%|██▉       | 57/197 [00:06<00:15,  8.81it/s]#033[A[0m
[34m#015 29%|██▉       | 58/197 [00:06<00:15,  8.87it/s]#033[A[0m
[34m#015 30%|██▉       | 59/197 [00:06<00:15,  8.95it/s]#033[A[0m
[34m#015 30%|███       | 60/197 [00:06<00:15,  8.98it/s]#033[A[0m
[34m#015 31%|███       | 61/197 [00:06<00:15,  8.97it/s]#033[A[0m
[34m#015 31%|███▏      | 62/197 [00:07<00:15,  8.91it/s]#033[A[0m
[34m#015 32%|███▏      | 63/197 [00:07<00:15,  8.87it/s]#033[A[0m
[34m#015 32%|███▏      | 64/197 [00:07<00:15,  8.84it/s]#033[A[0m
[34m#015 33%|███▎      | 65/197 [00:07<00:14,  

[34mon_validation_epoch_start          #011|  1.3092e-05     #011|1              #011|  1.3092e-05     #011|  0.00051868     #011|[0m
[34mon_epoch_start                     #011|  1.3015e-05     #011|1              #011|  1.3015e-05     #011|  0.00051563     #011|[0m
[34mon_epoch_end                       #011|  1.292e-05      #011|1              #011|  1.292e-05      #011|  0.00051186     #011|[0m
[34mon_validation_end                  #011|  1.164e-05      #011|1              #011|  1.164e-05      #011|  0.00046115     #011|[0m
[34m#015Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]#033[A[0m
[34m#015Finding best initial lr:   1%|          | 1/100 [00:00<00:11,  8.71it/s]#033[A[0m
[34m#015Finding best initial lr:   3%|▎         | 3/100 [00:00<00:10,  9.11it/s]#033[A[0m
[34m#015Finding best initial lr:   4%|▍         | 4/100 [00:00<00:10,  8.91it/s]#033[A[0m
[34m#015Finding best initial lr:   6%|▌         | 6/100 [00:00<00:10,  9.13it/s]#033[A[0m
[34

[34mLOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0][0m
  "`Trainer.running_sanity_check` has been renamed to `Trainer.sanity_checking` and will be removed in v1.5."[0m
[34m#015                                                                          #015#033[AEpoch 0, global step 1528: val_loss reached 0.16441 (best 0.16441), saving model to "/opt/ml/model/model_need_intervention_risk.ckpt" as top 1[0m
[34mEpoch 1, global step 3056: val_loss was not in top 1[0m
[34mEpoch 2, global step 4584: val_loss reached 0.15778 (best 0.15778), saving model to "/opt/ml/model/model_need_intervention_risk.ckpt" as top 1[0m
[34mEpoch 3, global step 6112: val_loss was not in top 1[0m
[34mEpoch 4, global step 7640: val_loss reached 0.14897 (best 0.14897), saving model to "/opt/ml/model/model_need_intervention_risk.ckpt" as top 1[0m
[34mEpoch 5, global step 9168: val_loss reached 0.12580 (best 0.12580), saving model to "/opt/ml/model/model_need_intervention_risk.ckpt" as top 1[0m
[34m#015  0%| 

[34m#015 11%|█▏        | 4/35 [00:00<00:03,  8.50it/s]#033[A[0m
[34m#015 14%|█▍        | 5/35 [00:00<00:03,  8.54it/s]#033[A[0m
[34m#015 17%|█▋        | 6/35 [00:00<00:03,  8.57it/s]#033[A[0m
[34m#015 20%|██        | 7/35 [00:00<00:03,  8.63it/s]#033[A[0m
[34m#015 23%|██▎       | 8/35 [00:00<00:03,  8.45it/s]#033[A[0m
[34m#015 26%|██▌       | 9/35 [00:01<00:03,  8.50it/s]#033[A[0m
[34m#015 29%|██▊       | 10/35 [00:01<00:02,  8.57it/s]#033[A[0m
[34m#015 31%|███▏      | 11/35 [00:01<00:02,  8.65it/s]#033[A[0m
[34m#015 34%|███▍      | 12/35 [00:01<00:02,  8.71it/s]#033[A[0m
[34m#015 37%|███▋      | 13/35 [00:01<00:02,  8.75it/s]#033[A[0m
[34m#015 40%|████      | 14/35 [00:01<00:02,  8.76it/s]#033[A[0m
[34m#015 43%|████▎     | 15/35 [00:01<00:02,  8.69it/s]#033[A[0m
[34m#015 46%|████▌     | 16/35 [00:01<00:02,  8.64it/s]#033[A[0m
[34m#015 49%|████▊     | 17/35 [00:01<00:02,  8.60it/s]#033[A[0m
[34m#015 51%|█████▏    | 18/35 [00:02<00:02,  8.37it/s]#033[A[0m


[34m#015 73%|███████▎  | 195/268 [00:22<00:08,  8.71it/s]#033[A[0m
[34m#015 73%|███████▎  | 196/268 [00:22<00:08,  8.80it/s]#033[A[0m
[34m#015 74%|███████▎  | 197/268 [00:22<00:08,  8.70it/s]#033[A[0m
[34m#015 74%|███████▍  | 198/268 [00:22<00:08,  8.69it/s]#033[A[0m
[34m#015 74%|███████▍  | 199/268 [00:22<00:07,  8.74it/s]#033[A[0m
[34m#015 75%|███████▍  | 200/268 [00:22<00:07,  8.74it/s]#033[A[0m
[34m#015 75%|███████▌  | 201/268 [00:22<00:07,  8.84it/s]#033[A[0m
[34m#015 75%|███████▌  | 202/268 [00:22<00:07,  8.85it/s]#033[A[0m
[34m#015 76%|███████▌  | 203/268 [00:22<00:07,  8.89it/s]#033[A[0m
[34m#015 76%|███████▌  | 204/268 [00:23<00:07,  8.96it/s]#033[A[0m
[34m#015 76%|███████▋  | 205/268 [00:23<00:06,  9.02it/s]#033[A[0m
[34m#015 77%|███████▋  | 206/268 [00:23<00:06,  8.99it/s]#033[A[0m
[34m#015 77%|███████▋  | 207/268 [00:23<00:06,  8.97it/s]#033[A[0m
[34m#015 78%|███████▊  | 208/268 [00:23<00:06,  8.89it/s]#033[A[0m
[34m#015 78%|███████▊  | 209/268 


2021-11-18 06:23:03 Uploading - Uploading generated training modelProfilerReport-1637152154: IssuesFound

2021-11-18 06:33:50 Completed - Training job completed
Training seconds: 64875
Billable seconds: 64875
