# Import

These requirements are necessary if you launch this notebook from SageMaker instances

In [1]:
"""!pip install mlflow
!pip install pytorch-lightning
!pip install transformers
!pip install tqdm
!pip install sagemaker

!pip install s3fs
!pip install smdebug"""

'!pip install mlflow\n!pip install pytorch-lightning\n!pip install transformers\n!pip install tqdm\n!pip install sagemaker\n\n!pip install s3fs\n!pip install smdebug'

In [2]:
import sys
sys.path.append('../../../')

import os
import sys
import logging
import argparse
from pathlib import Path
from typing import Any, Dict, Optional

In [3]:
from tqdm.auto import tqdm

import torchmetrics
from torchmetrics.functional import accuracy, f1, auroc

import sagemaker
from sagemaker import get_execution_role
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.core.decorators import auto_move_data
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import MLFlowLogger


import matplotlib.pyplot as plt
from pylab import rcParams
from matplotlib import rc

import pandas as pd

Local constants, regarding the data, MLFlow server, paths, etc..: use them

In [4]:
from deep.constants import *
from deep.utils import *

In [5]:
%load_ext autoreload
%autoreload 2

# Data

You can use the data you want. We advise the `pandas` format.

In [6]:
DATA_PATH = os.path.join(
    '..', '..', '..', "data", "frameworks_data", 'data_v0.7','generated_dataset'
)

"""
original_df = pd.read_csv(os.path.join(DATA_PATH, 'full_dataset.csv'))
augmented_data = pd.read_csv(os.path.join(DATA_PATH, 'generated_text.csv'))"""


tot_df = pd.read_csv(os.path.join(DATA_PATH, 'train_val_all.csv'), index_col=0)
test_df = pd.read_csv(os.path.join(DATA_PATH, 'test_v0.7.csv'))

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [7]:
test_df.head()

Unnamed: 0,entry_id,excerpt,analysis_framework_id,lead_id,project_id,verified,sectors,subpillars_2d,subpillars_1d,geo_location,specific_needs_groups,severity,info_date,demographic_groups,reliability,affected_groups,source_type,url,website
0,489433,Primary and secondary net enrollment rates are...,1306,67488.0,2225.0,False,['Education'],['Humanitarian Conditions->Living Standards'],['Context->Socio Cultural'],['République démocratique du Congo'],[],['Major'],UNKNOWN,"['Children/Youth Male (5 to 17 years old)', 'C...",['Usually reliable'],['Affected'],website,https://blogs.worldbank.org/education/free-pri...,blogs.worldbank.org
1,489435,"After past, partially implemented attempts, th...",1306,67488.0,2225.0,False,['Education'],['Capacities & Response->International Respons...,['Context->Socio Cultural'],['République démocratique du Congo'],[],[],UNKNOWN,"['Children/Youth Male (5 to 17 years old)', 'C...",['Usually reliable'],['Affected'],website,https://blogs.worldbank.org/education/free-pri...,blogs.worldbank.org
2,194719,"[10th November, NW Syria] Now with the lockdow...",1306,43771.0,2028.0,False,[],[],['Covid-19->Restriction Measures'],['Syrian Arab Republic'],[],[],"['10-11-2020', None]",[],['Usually reliable'],[],website,https://reliefweb.int/report/syrian-arab-repub...,www.reliefweb.int
3,186152,El Salvador hace frente a una combinación de c...,829,41125.0,1898.0,False,"['Nutrition', 'WASH', 'Protection', 'Food Secu...",['Humanitarian Conditions->Living Standards'],[],['El Salvador'],[],['Of Concern'],"['14-10-2020', None]",[],['Usually reliable'],"['All', 'Affected']",website,https://www.acaps.org/sites/acaps/files/key-do...,https://www.acaps.org
4,186986,Las personas que habitan en cantones donde no ...,829,42489.0,1184.0,False,['Cross'],['NOT_MAPPED'],[],"['El Oro', 'Huaquillas']",[],['Of Concern'],"['01-08-2020', '31-08-2020']",[],['Usually reliable'],"['Displaced', 'Refugees', 'Migrants', 'Affected']",website,https://reliefweb.int/sites/reliefweb.int/file...,reliefweb.int


In [8]:
"""augmented_data = pd.merge(
    right=original_df.drop(columns=['excerpt']),
    left=augmented_data[['entry_id', 'excerpt']],
    on='entry_id',
    how='right'
)"""

"augmented_data = pd.merge(\n    right=original_df.drop(columns=['excerpt']),\n    left=augmented_data[['entry_id', 'excerpt']],\n    on='entry_id',\n    how='right'\n)"

In [9]:
"""tot_df = pd.concat([original_df, augmented_data])"""

'tot_df = pd.concat([original_df, augmented_data])'

In [10]:
"""from ast import literal_eval"""

'from ast import literal_eval'

In [11]:
"""tot_df['pillars_1d'] = tot_df.subpillars_1d.apply(
    lambda item: [x.split('->')[0] for x in literal_eval(item)]
)
tot_df['pillars_2d'] = tot_df.subpillars_2d.apply(
    lambda item: [x.split('->')[0] for x in literal_eval(item)]
)"""

"tot_df['pillars_1d'] = tot_df.subpillars_1d.apply(\n    lambda item: [x.split('->')[0] for x in literal_eval(item)]\n)\ntot_df['pillars_2d'] = tot_df.subpillars_2d.apply(\n    lambda item: [x.split('->')[0] for x in literal_eval(item)]\n)"

In [12]:
tot_df = tot_df[
    ['entry_id', 'excerpt', 'lead_id',
     'sectors',
     'severity',
     #'demographic_groups',
     'subpillars_1d', 
     'specific_needs_groups',
     'subpillars_2d', 
     #'affected_groups',
     'pillars_2d',
     'pillars_1d'
     
    ]
]

## Sagemaker Prep

### Session

Configure SageMaker

In [13]:
sess = sagemaker.Session(default_bucket=DEV_BUCKET.name)
role = SAGEMAKER_ROLE
role_arn = SAGEMAKER_ROLE_ARN
tracking_uri = MLFLOW_SERVER

### Bucket upload

You need to upload data to an S3 bucket. 




In [14]:
MLFLOW_SERVER

'http://mlflow-deep-387470f3-1883319727.us-east-1.elb.amazonaws.com/'

In [15]:
sample = False  # To make the computations faster, sample = True.

if sample:
    tot_df = tot_df.sample(n=50_000)
    
job_name = f"pytorch-{formatted_time()}-all-models"  # change it as you prefer
input_path = DEV_BUCKET / 'training' / 'input_data' / job_name  # Do not change this

train_path = str(input_path / 'train.pickle')
val_path = str(input_path / 'val.pickle')


tot_df.to_pickle(train_path, protocol=4)  # protocol 4 is necessary, since SageMaker uses python 3.6
test_df.to_pickle(val_path, protocol=4)

### Estimator Definition

In [16]:
# GPU instances

instances = [
    'ml.p2.xlarge',
    'ml.p3.2xlarge'
]

The hyperparameters are passed as command line arguments to the training script. 

You can add/change them as you like. It's important to keep the `tracking_uri` and the `experiment_name` which are used by MLFlow.

The class `PyTorch` is part of the `SageMaker` python API. The parameters are important and you should probably not change most of them. The ones you may want to change are:

- `instance_type`, specify the instance you want
- `source_dir`, specify your script directory. Try to use global variable as much as possible

In [17]:
from sagemaker.pytorch import PyTorch

proportions_negative_examples_test = {
    'sectors': 0.16,
    'demographic_groups': 0.72,
    'subpillars_1d': 0.57,
    'pillars_1d': 0.57,
    'pillars_2d': 0.28,
    'specific_needs_groups': 0.86,
    'subpillars_2d': 0.28,
    'affected_groups': 0.30
}

proportions_negative_examples_train = {
    'sectors':0.02,
    'subpillars_2d':0.05,
    'pillars_2d':0.05,
    'subpillars_1d': 0.57,
    'pillars_1d':0.57,
    'demographic_groups': 0.72,
    'specific_needs_groups': 0.86,
    'affected_groups': 0.15
}
instance_type='ml.p3.2xlarge'

hyperparameters={
    'tracking_uri': MLFLOW_SERVER,
    'experiment_name': "pl-trials",
    'max_len': 256,
    'epochs': 3,
    'model_name': 'microsoft/xtremedistil-l6-h256-uncased',
    'tokenizer_name': 'microsoft/xtremedistil-l6-h256-uncased',
    'dropout_rate': 0.3,
    'pred_threshold':0.4,
    'output_length': 256,
    'learning_rate': 5e-5,
    #'training_names':'sectors,subpillars_2d,pillars_2d,pillars_1d,subpillars_1d,specific_needs_groups,affected_groups,demographic_groups,severity',
    'training_names':'sectors,subpillars_2d,pillars_2d,pillars_1d,subpillars_1d,specific_needs_groups,severity',
    #'training_names':'subpillars_1d,specific_needs_groups,demographic_groups',
    #'training_names':'sectors,subpillars_2d,subpillars_1d,severity,specific_needs_groups,affected_groups,demographic_groups',
    #'train_with_all_positive_examples':True,
    "model_mode":"train",
    "proportions_negative_examples_test": str(proportions_negative_examples_test),
    "proportions_negative_examples_train": str(proportions_negative_examples_train),
    "instance_type": instance_type,
    'beta_f1': 0.8
    #"numbers_augmentation":"with"
}

estimator = PyTorch(
    entry_point='train_mlflow.py',
    source_dir=str('../../../scripts/training/selim/multiclass-lightning'),
    output_path=str(DEV_BUCKET/'models/'),
    code_location=str(input_path),
    instance_type=instance_type,
    instance_count=1,
    role=role,
    framework_version="1.8",
    py_version="py36",
    hyperparameters = hyperparameters,
    job_name=job_name,
    #distribution={"smdistributed": {"dataparallel": {"enabled": False}}}
#     train_instance_count=2,
#     train_instance_type="ml.c4.xlarge",
)

In [18]:
fit_arguments = {
    'train': str(input_path),
    'test': str(input_path)
}

In [19]:
# Fit the estimator

estimator.fit(fit_arguments, job_name=job_name)

2021-10-27 09:31:37 Starting - Starting the training job...
2021-10-27 09:32:00 Starting - Launching requested ML instancesProfilerReport-1635327090: InProgress
......
2021-10-27 09:33:12 Starting - Preparing the instances for training............
2021-10-27 09:35:11 Downloading - Downloading input data
2021-10-27 09:35:11 Training - Downloading the training image..................
2021-10-27 09:38:26 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-10-27 09:38:27,390 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-10-27 09:38:27,413 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-10-27 09:38:27,422 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-10-27 09:38:27,967 sagemaker-tra

[34mCollecting boto3>=1.16.32
  Downloading boto3-1.19.4-py3-none-any.whl (131 kB)[0m
  Downloading boto3-1.19.3-py3-none-any.whl (131 kB)
  Downloading boto3-1.19.2-py3-none-any.whl (131 kB)
  Downloading boto3-1.19.1-py3-none-any.whl (131 kB)
  Downloading boto3-1.19.0-py3-none-any.whl (131 kB)[0m
[34m  Downloading boto3-1.18.65-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.64-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.62-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.61-py3-none-any.whl (131 kB)[0m
[34m  Downloading boto3-1.18.60-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.59-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.58-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.57-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.56-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.55-py3-none-any.whl (131 kB)[0m
[34m  Downloading boto3-1.18.54-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.53-py3-none-any.whl (131 kB)
  Downloading boto3-1.18.

[34mCollecting oauthlib>=3.0.0
  Downloading oauthlib-3.1.1-py2.py3-none-any.whl (146 kB)[0m
[34mBuilding wheels for collected packages: nltk, sagemaker, aiobotocore, alembic, databricks-cli, termcolor, wrapt, idna-ssl
  Building wheel for nltk (setup.py): started[0m
[34m  Building wheel for nltk (setup.py): finished with status 'done'
  Created wheel for nltk: filename=nltk-3.2.5-py3-none-any.whl size=1392142 sha256=3303cc85701ea1a6d407fe404570c72b5b0c536dcbf7a1e82cbb8b4ecf22b5b2
  Stored in directory: /root/.cache/pip/wheels/f2/7f/71/cb36468789a03b5e2908281c8e1ce093e6860258b6b61677d8
  Building wheel for sagemaker (setup.py): started
  Building wheel for sagemaker (setup.py): finished with status 'done'[0m
[34m  Created wheel for sagemaker: filename=sagemaker-2.49.1-py2.py3-none-any.whl size=591916 sha256=1c4a3c7c6f517220af8cfc63998ba96ecd93684885679c63a33dbdcc095169c8
  Stored in directory: /root/.cache/pip/wheels/4c/af/ea/8ff5943a87155df5b184e54474fbf2b59b75e5c172854643c6
  

[34m[2021-10-27 09:40:21.277 algo-1:85 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2021-10-27 09:40:21.314 algo-1:85 INFO profiler_config_parser.py:102] User has disabled profiler.[0m
[34m[2021-10-27 09:40:21.315 algo-1:85 INFO json_config.py:91] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.[0m
[34m[2021-10-27 09:40:21.315 algo-1:85 INFO hook.py:201] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.[0m
[34m[2021-10-27 09:40:21.316 algo-1:85 INFO hook.py:255] Saving to /opt/ml/output/tensors[0m
[34m[2021-10-27 09:40:21.316 algo-1:85 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.[0m
[34m[2021-10-27 09:40:21.426 algo-1:85 INFO hook.py:594] name:model.l0.embeddings.word_embeddings.weight count_params:7813632[0m
[34m[2021-10-27 09:40:21.427 algo-1:85 INFO hook.py:594] name:model.l0.embeddings.position_embeddings.weig

[34m#015Validation sanity check: 0it [00:00, ?it/s]#015Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]#015                                                              #015#015Training: 99it [00:00, ?it/s]#015Training:   0%|          | 0/7573 [00:00<?, ?it/s]#015Epoch 0:   0%|          | 0/7573 [00:00<?, ?it/s] #015Epoch 0:   0%|          | 30/7573 [00:01<08:09, 15.40it/s]#015Epoch 0:   0%|          | 30/7573 [00:01<08:09, 15.40it/s, loss=2.62, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.519]#015Epoch 0:   1%|          | 60/7573 [00:03<07:59, 15.68it/s, loss=2.62, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.519]#015Epoch 0:   1%|          | 60/7573 [00:03<07:59, 15.67it/s, loss=2.05, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.470]#015Epoch 0:   1%|          | 90/7573 [00:05<07:47, 16.00it/s, loss=2.05, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.470]#015Epoch 0:   1%|          | 90/7573 [00:05<

[34ms, loss=1.1, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.774]  #015Epoch 0:  23%|██▎       | 1740/7573 [01:48<06:05, 15.98it/s, loss=1.1, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.774]#015Epoch 0:  23%|██▎       | 1740/7573 [01:48<06:05, 15.98it/s, loss=0.892, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.736]#015Epoch 0:  23%|██▎       | 1770/7573 [01:50<06:03, 15.98it/s, loss=0.892, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.736]#015Epoch 0:  23%|██▎       | 1770/7573 [01:50<06:03, 15.98it/s, loss=0.986, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.819]#015Epoch 0:  24%|██▍       | 1800/7573 [01:52<06:01, 15.98it/s, loss=0.986, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.819]#015Epoch 0:  24%|██▍       | 1800/7573 [01:52<06:01, 15.98it/s, loss=0.884, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.825]#015Epoch 0:  24%|██▍       | 1830/7573 [01:54<05:5

[34m_f1=0.920]#015Epoch 0:  45%|████▍     | 3390/7573 [03:29<04:18, 16.15it/s, loss=0.748, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.920]#015Epoch 0:  45%|████▍     | 3390/7573 [03:29<04:18, 16.15it/s, loss=0.859, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.825]#015Epoch 0:  45%|████▌     | 3420/7573 [03:31<04:17, 16.15it/s, loss=0.859, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.825]#015Epoch 0:  45%|████▌     | 3420/7573 [03:31<04:17, 16.15it/s, loss=0.726, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.841]#015Epoch 0:  46%|████▌     | 3450/7573 [03:33<04:15, 16.15it/s, loss=0.726, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.841]#015Epoch 0:  46%|████▌     | 3450/7573 [03:33<04:15, 16.15it/s, loss=0.874, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.863]#015Epoch 0:  46%|████▌     | 3480/7573 [03:35<04:13, 16.16it/s, loss=0.874, v_num=0, val_f1_epoch=0.0652, val_loss_epoch

[34m�█▌   | 4980/7573 [05:07<02:40, 16.20it/s, loss=0.813, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.798]#015Epoch 0:  66%|██████▌   | 5010/7573 [05:09<02:38, 16.20it/s, loss=0.813, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.798]#015Epoch 0:  66%|██████▌   | 5010/7573 [05:09<02:38, 16.20it/s, loss=0.639, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.892]#015Epoch 0:  67%|██████▋   | 5040/7573 [05:11<02:36, 16.19it/s, loss=0.639, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.892]#015Epoch 0:  67%|██████▋   | 5040/7573 [05:11<02:36, 16.19it/s, loss=0.749, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.904]#015Epoch 0:  67%|██████▋   | 5070/7573 [05:13<02:34, 16.19it/s, loss=0.749, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.904]#015Epoch 0:  67%|██████▋   | 5070/7573 [05:13<02:34, 16.19it/s, loss=0.699, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.849]#015Epoch

[34mnum=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.868]#015Epoch 0:  87%|████████▋ | 6570/7573 [06:48<01:02, 16.10it/s, loss=0.561, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.868]#015Epoch 0:  87%|████████▋ | 6570/7573 [06:48<01:02, 16.10it/s, loss=0.723, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.932]#015Epoch 0:  87%|████████▋ | 6600/7573 [06:50<01:00, 16.10it/s, loss=0.723, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.932]#015Epoch 0:  87%|████████▋ | 6600/7573 [06:50<01:00, 16.10it/s, loss=0.696, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.904]#015Epoch 0:  88%|████████▊ | 6630/7573 [06:51<00:58, 16.10it/s, loss=0.696, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.904]#015Epoch 0:  88%|████████▊ | 6630/7573 [06:51<00:58, 16.10it/s, loss=0.723, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.874]#015Epoch 0:  88%|████████▊ | 6660/7573 [06:53<00:56, 16.10it/s, l

[34m#015Validating:  78%|███████▊  | 360/461 [00:13<00:03, 26.45it/s]#033[A#015Epoch 0:  99%|█████████▉| 7500/7573 [07:35<00:04, 16.47it/s, loss=0.675, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.809][0m
[34m#015Validating:  85%|████████▍ | 390/461 [00:14<00:02, 27.13it/s]#033[A#015Epoch 0:  99%|█████████▉| 7530/7573 [07:36<00:02, 16.49it/s, loss=0.675, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.809][0m
[34m#015Validating:  91%|█████████ | 420/461 [00:15<00:01, 27.32it/s]#033[A#015Epoch 0: 100%|█████████▉| 7560/7573 [07:37<00:00, 16.52it/s, loss=0.675, v_num=0, val_f1_epoch=0.0652, val_loss_epoch=0.668, train_f1=0.809][0m
[34m#015Validating:  98%|█████████▊| 450/461 [00:16<00:00, 27.15it/s]#033[A[0m
[34m#015Validating: 100%|██████████| 461/461 [00:17<00:00, 27.26it/s]#033[A#015Epoch 0: 100%|██████████| 7573/7573 [07:39<00:00, 16.49it/s, loss=0.691, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.859, val_f1_step=0.520, val_l

[34moss_step=0.165]#015Epoch 1:  18%|█▊        | 1350/7573 [01:24<06:30, 15.95it/s, loss=0.681, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.841, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  18%|█▊        | 1350/7573 [01:24<06:30, 15.95it/s, loss=0.623, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.915, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  18%|█▊        | 1380/7573 [01:26<06:27, 15.97it/s, loss=0.623, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.915, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  18%|█▊        | 1380/7573 [01:26<06:27, 15.97it/s, loss=0.566, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.855, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  19%|█▊        | 1410/7573 [01:28<06:25, 15.97it/s, loss=0.566, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.855, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  19%|█▊        | 1410/7573 [01:28<06:25, 15.97it/s, 

[34m#015Epoch 1:  35%|███▌      | 2670/7573 [02:45<05:04, 16.09it/s, loss=0.584, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.886, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  35%|███▌      | 2670/7573 [02:45<05:04, 16.09it/s, loss=0.699, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.892, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  36%|███▌      | 2700/7573 [02:47<05:02, 16.09it/s, loss=0.699, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.892, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  36%|███▌      | 2700/7573 [02:47<05:02, 16.09it/s, loss=0.568, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.909, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  36%|███▌      | 2730/7573 [02:49<05:00, 16.11it/s, loss=0.568, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.909, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  36%|███▌      | 2730/7573 [02:49<05:00, 16.11it/s, loss=0.695, v_n

[34mepoch=0.810, val_loss_epoch=0.114, train_f1=0.897, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  52%|█████▏    | 3960/7573 [04:05<03:43, 16.14it/s, loss=0.544, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.882, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  53%|█████▎    | 3990/7573 [04:07<03:42, 16.14it/s, loss=0.544, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.882, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  53%|█████▎    | 3990/7573 [04:07<03:42, 16.14it/s, loss=0.56, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.910, val_f1_step=0.520, val_loss_step=0.165] #015Epoch 1:  53%|█████▎    | 4020/7573 [04:09<03:40, 16.14it/s, loss=0.56, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.910, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  53%|█████▎    | 4020/7573 [04:09<03:40, 16.14it/s, loss=0.615, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.830, val_f1_step=0.520, val_loss_st

[34m=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.841, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  69%|██████▉   | 5250/7573 [05:24<02:23, 16.19it/s, loss=0.696, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.841, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  69%|██████▉   | 5250/7573 [05:24<02:23, 16.19it/s, loss=0.541, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.892, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  70%|██████▉   | 5280/7573 [05:26<02:21, 16.19it/s, loss=0.541, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.892, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  70%|██████▉   | 5280/7573 [05:26<02:21, 16.19it/s, loss=0.697, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.904, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  70%|███████   | 5310/7573 [05:27<02:19, 16.19it/s, loss=0.697, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.904, val_f1_step=0.520,

[34m:  86%|████████▌ | 6480/7573 [06:40<01:07, 16.17it/s, loss=0.538, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.867, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  86%|████████▌ | 6510/7573 [06:42<01:05, 16.16it/s, loss=0.538, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.867, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  86%|████████▌ | 6510/7573 [06:42<01:05, 16.16it/s, loss=0.563, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.950, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  86%|████████▋ | 6540/7573 [06:44<01:03, 16.15it/s, loss=0.563, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.950, val_f1_step=0.520, val_loss_step=0.165]#015Epoch 1:  86%|████████▋ | 6540/7573 [06:44<01:03, 16.15it/s, loss=0.46, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.938, val_f1_step=0.520, val_loss_step=0.165] #015Epoch 1:  87%|████████▋ | 6570/7573 [06:46<01:02, 16.16it/s, loss=0.46, v_num=0, val_f1

[34m#015Validating:  52%|█████▏    | 240/461 [00:08<00:08, 26.91it/s]#033[A#015Epoch 1:  97%|█████████▋| 7380/7573 [07:29<00:11, 16.42it/s, loss=0.648, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.827, val_f1_step=0.520, val_loss_step=0.165][0m
[34m#015Validating:  59%|█████▊    | 270/461 [00:10<00:07, 26.27it/s]#033[A#015Epoch 1:  98%|█████████▊| 7410/7573 [07:30<00:09, 16.44it/s, loss=0.648, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.827, val_f1_step=0.520, val_loss_step=0.165][0m
[34m#015Validating:  65%|██████▌   | 300/461 [00:11<00:06, 25.83it/s]#033[A#015Epoch 1:  98%|█████████▊| 7440/7573 [07:31<00:08, 16.47it/s, loss=0.648, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, train_f1=0.827, val_f1_step=0.520, val_loss_step=0.165][0m
[34m#015Validating:  72%|███████▏  | 330/461 [00:12<00:05, 25.86it/s]#033[A#015Epoch 1:  99%|█████████▊| 7470/7573 [07:32<00:06, 16.49it/s, loss=0.648, v_num=0, val_f1_epoch=0.810, val_loss_epoch=0.114, trai

[34m1, val_loss_step=0.182]#015Epoch 2:  18%|█▊        | 1350/7573 [01:22<06:21, 16.32it/s, loss=0.488, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.892, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  18%|█▊        | 1350/7573 [01:22<06:21, 16.32it/s, loss=0.505, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.880, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  18%|█▊        | 1380/7573 [01:24<06:19, 16.32it/s, loss=0.505, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.880, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  18%|█▊        | 1380/7573 [01:24<06:19, 16.32it/s, loss=0.682, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.886, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  19%|█▊        | 1410/7573 [01:26<06:17, 16.34it/s, loss=0.682, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.886, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  19%|█▊        | 1410/7573 [01:26<06:17, 16.

[34mp=0.182]#015Epoch 2:  35%|███▌      | 2670/7573 [02:44<05:02, 16.21it/s, loss=0.529, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.910, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  35%|███▌      | 2670/7573 [02:44<05:02, 16.21it/s, loss=0.546, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.897, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  36%|███▌      | 2700/7573 [02:46<05:00, 16.21it/s, loss=0.546, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.897, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  36%|███▌      | 2700/7573 [02:46<05:00, 16.21it/s, loss=0.515, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.758, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  36%|███▌      | 2730/7573 [02:48<04:58, 16.21it/s, loss=0.515, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.758, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  36%|███▌      | 2730/7573 [02:48<04:58, 16.21it/s, loss=0.

[34m_epoch=0.824, val_loss_epoch=0.108, train_f1=0.869, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  52%|█████▏    | 3960/7573 [04:04<03:43, 16.19it/s, loss=0.542, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.916, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  53%|█████▎    | 3990/7573 [04:06<03:41, 16.18it/s, loss=0.542, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.916, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  53%|█████▎    | 3990/7573 [04:06<03:41, 16.18it/s, loss=0.514, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.876, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  53%|█████▎    | 4020/7573 [04:08<03:39, 16.19it/s, loss=0.514, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.876, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  53%|█████▎    | 4020/7573 [04:08<03:39, 16.19it/s, loss=0.51, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.983, val_f1_step=0.521, val_loss_s

[34m, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.932, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  69%|██████▉   | 5250/7573 [05:25<02:24, 16.12it/s, loss=0.464, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.932, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  69%|██████▉   | 5250/7573 [05:25<02:24, 16.12it/s, loss=0.485, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.881, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  70%|██████▉   | 5280/7573 [05:27<02:22, 16.12it/s, loss=0.485, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.881, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  70%|██████▉   | 5280/7573 [05:27<02:22, 16.12it/s, loss=0.489, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.876, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  70%|███████   | 5310/7573 [05:29<02:20, 16.12it/s, loss=0.489, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.876, val_f1_step=0.521, v

[34m]#015Epoch 2:  86%|████████▌ | 6480/7573 [06:43<01:08, 16.05it/s, loss=0.391, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.910, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  86%|████████▌ | 6510/7573 [06:45<01:06, 16.05it/s, loss=0.391, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.910, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  86%|████████▌ | 6510/7573 [06:45<01:06, 16.05it/s, loss=0.436, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.892, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  86%|████████▋ | 6540/7573 [06:47<01:04, 16.05it/s, loss=0.436, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.892, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  86%|████████▋ | 6540/7573 [06:47<01:04, 16.05it/s, loss=0.533, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.800, val_f1_step=0.521, val_loss_step=0.182]#015Epoch 2:  87%|████████▋ | 6570/7573 [06:49<01:02, 16.06it/s, loss=0.533, v_

[34m#015Validating:  20%|█▉        | 90/461 [00:03<00:14, 25.97it/s]#033[A#015Epoch 2:  95%|█████████▌| 7230/7573 [07:25<00:21, 16.21it/s, loss=0.498, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.916, val_f1_step=0.521, val_loss_step=0.182][0m
[34m#015Validating:  26%|██▌       | 120/461 [00:04<00:12, 27.09it/s]#033[A#015Epoch 2:  96%|█████████▌| 7260/7573 [07:26<00:19, 16.24it/s, loss=0.498, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.916, val_f1_step=0.521, val_loss_step=0.182][0m
[34m#015Validating:  33%|███▎      | 150/461 [00:05<00:11, 27.74it/s]#033[A#015Epoch 2:  96%|█████████▋| 7290/7573 [07:27<00:17, 16.27it/s, loss=0.498, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train_f1=0.916, val_f1_step=0.521, val_loss_step=0.182][0m
[34m#015Validating:  39%|███▉      | 180/461 [00:06<00:10, 27.34it/s]#033[A#015Epoch 2:  97%|█████████▋| 7320/7573 [07:29<00:15, 16.30it/s, loss=0.498, v_num=0, val_f1_epoch=0.824, val_loss_epoch=0.108, train

[34m1, train_f1=0.482]#015Epoch 0:  20%|██        | 1710/8408 [01:39<06:31, 17.11it/s, loss=2.82, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.482]#015Epoch 0:  20%|██        | 1710/8408 [01:39<06:31, 17.11it/s, loss=1.96, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.498]#015Epoch 0:  21%|██        | 1740/8408 [01:41<06:29, 17.12it/s, loss=1.96, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.498]#015Epoch 0:  21%|██        | 1740/8408 [01:41<06:29, 17.11it/s, loss=2.1, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.482] #015Epoch 0:  21%|██        | 1770/8408 [01:43<06:28, 17.10it/s, loss=2.1, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.482]#015Epoch 0:  21%|██        | 1770/8408 [01:43<06:28, 17.10it/s, loss=2.91, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.485]#015Epoch 0:  21%|██▏       | 1800/8408 [01:45<06:26, 17.11it/s, loss=2.91, v_num=0, val_f1_epoch=0.0428, val_loss_epoch

[34mum=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480] #015Epoch 0:  40%|████      | 3390/8408 [03:18<04:53, 17.10it/s, loss=2.7, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480]#015Epoch 0:  40%|████      | 3390/8408 [03:18<04:53, 17.10it/s, loss=2.33, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483]#015Epoch 0:  41%|████      | 3420/8408 [03:20<04:51, 17.09it/s, loss=2.33, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483]#015Epoch 0:  41%|████      | 3420/8408 [03:20<04:51, 17.09it/s, loss=2.03, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483]#015Epoch 0:  41%|████      | 3450/8408 [03:21<04:50, 17.09it/s, loss=2.03, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483]#015Epoch 0:  41%|████      | 3450/8408 [03:21<04:50, 17.09it/s, loss=2.8, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.485] #015Epoch 0:  41%|████▏     | 3480/8408 [03:23<04:48, 17.09it/s, loss=2.8

[34m0/8408 [04:53<03:19, 17.05it/s, loss=1.84, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.484]#015Epoch 0:  60%|█████▉    | 5010/8408 [04:53<03:19, 17.05it/s, loss=2.12, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483]#015Epoch 0:  60%|█████▉    | 5040/8408 [04:55<03:17, 17.05it/s, loss=2.12, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483]#015Epoch 0:  60%|█████▉    | 5040/8408 [04:55<03:17, 17.05it/s, loss=2.5, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480] #015Epoch 0:  60%|██████    | 5070/8408 [04:57<03:15, 17.05it/s, loss=2.5, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480]#015Epoch 0:  60%|██████    | 5070/8408 [04:57<03:15, 17.05it/s, loss=2.06, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.481]#015Epoch 0:  61%|██████    | 5100/8408 [04:59<03:14, 17.04it/s, loss=2.06, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.481]#015Epoch 0:  61%|██████    

[34m0/8408 [06:27<01:46, 17.04it/s, loss=2.19, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.481]#015Epoch 0:  78%|███████▊  | 6600/8408 [06:27<01:46, 17.04it/s, loss=2.17, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.498]#015Epoch 0:  79%|███████▉  | 6630/8408 [06:29<01:44, 17.04it/s, loss=2.17, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.498]#015Epoch 0:  79%|███████▉  | 6630/8408 [06:29<01:44, 17.04it/s, loss=2.4, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480] #015Epoch 0:  79%|███████▉  | 6660/8408 [06:30<01:42, 17.04it/s, loss=2.4, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480]#015Epoch 0:  79%|███████▉  | 6660/8408 [06:30<01:42, 17.04it/s, loss=2.21, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480]#015Epoch 0:  80%|███████▉  | 6690/8408 [06:32<01:40, 17.04it/s, loss=2.21, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.480]#015Epoch 0:  80%|███████▉  

[34m#015Validating:  39%|███▉      | 240/619 [00:12<00:20, 18.70it/s]#033[A#015Epoch 0:  96%|█████████▌| 8040/8408 [07:50<00:21, 17.08it/s, loss=2.21, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483][0m
[34m#015Validating:  44%|████▎     | 270/619 [00:14<00:18, 18.39it/s]#033[A#015Epoch 0:  96%|█████████▌| 8070/8408 [07:52<00:19, 17.08it/s, loss=2.21, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483][0m
[34m#015Validating:  48%|████▊     | 300/619 [00:16<00:17, 17.88it/s]#033[A#015Epoch 0:  96%|█████████▋| 8100/8408 [07:54<00:18, 17.08it/s, loss=2.21, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483][0m
[34m#015Validating:  53%|█████▎    | 330/619 [00:18<00:16, 17.55it/s]#033[A#015Epoch 0:  97%|█████████▋| 8130/8408 [07:56<00:16, 17.08it/s, loss=2.21, v_num=0, val_f1_epoch=0.0428, val_loss_epoch=0.701, train_f1=0.483][0m
[34m#015Validating:  58%|█████▊    | 360/619 [00:19<00:15, 17.19it/s]#033[A#015Epoch 0:  97%|█████████▋

[34mep=0.0689]#015Epoch 1:  16%|█▌        | 1350/8408 [01:19<06:57, 16.92it/s, loss=2.19, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  16%|█▌        | 1350/8408 [01:19<06:57, 16.92it/s, loss=1.81, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  16%|█▋        | 1380/8408 [01:21<06:55, 16.90it/s, loss=1.81, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  16%|█▋        | 1380/8408 [01:21<06:55, 16.90it/s, loss=1.61, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  17%|█▋        | 1410/8408 [01:23<06:53, 16.91it/s, loss=1.61, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  17%|█▋        | 1410/8408 [01:23<06:53, 16.91it/s, loss=

[34ms, loss=2.09, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  32%|███▏      | 2670/8408 [02:37<05:38, 16.94it/s, loss=2.62, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  32%|███▏      | 2700/8408 [02:39<05:36, 16.94it/s, loss=2.62, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  32%|███▏      | 2700/8408 [02:39<05:36, 16.94it/s, loss=1.99, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  32%|███▏      | 2730/8408 [02:41<05:35, 16.93it/s, loss=1.99, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  32%|███▏      | 2730/8408 [02:41<05:35, 16.93it/s, loss=2.36, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485,

[34m  | 3960/8408 [03:53<04:22, 16.95it/s, loss=2.25, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.480, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  47%|████▋     | 3990/8408 [03:55<04:20, 16.94it/s, loss=2.25, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.480, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  47%|████▋     | 3990/8408 [03:55<04:20, 16.94it/s, loss=2.37, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  48%|████▊     | 4020/8408 [03:57<04:19, 16.94it/s, loss=2.37, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  48%|████▊     | 4020/8408 [03:57<04:19, 16.94it/s, loss=2.23, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  48%|████▊     | 4050/8408 [03:58<04:17, 16.95it/s, loss=2.23, v_num=0, val_f1_epoch=0.485, v

[34m193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  62%|██████▏   | 5250/8408 [05:09<03:05, 16.99it/s, loss=2.87, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  63%|██████▎   | 5280/8408 [05:10<03:04, 16.98it/s, loss=2.87, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  63%|██████▎   | 5280/8408 [05:10<03:04, 16.98it/s, loss=2.11, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.479, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  63%|██████▎   | 5310/8408 [05:12<03:02, 16.99it/s, loss=2.11, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.479, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  63%|██████▎   | 5310/8408 [05:12<03:02, 16.99it/s, loss=2.28, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  64%|█

[34m_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  77%|███████▋  | 6510/8408 [06:23<01:51, 16.98it/s, loss=1.84, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  78%|███████▊  | 6540/8408 [06:25<01:49, 16.98it/s, loss=1.84, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  78%|███████▊  | 6540/8408 [06:25<01:49, 16.98it/s, loss=2.01, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  78%|███████▊  | 6570/8408 [06:26<01:48, 16.98it/s, loss=2.01, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  78%|███████▊  | 6570/8408 [06:26<01:48, 16.98it/s, loss=2.42, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0689]#015Epoch 1:  78%|███████▊  | 6600/8408 [06

[34m#015Validating:  15%|█▍        | 90/619 [00:05<00:29, 17.88it/s]#033[A#015Epoch 1:  94%|█████████▍| 7890/8408 [07:43<00:30, 17.02it/s, loss=2.64, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689][0m
[34m#015Validating:  19%|█▉        | 120/619 [00:06<00:27, 18.12it/s]#033[A#015Epoch 1:  94%|█████████▍| 7920/8408 [07:45<00:28, 17.03it/s, loss=2.64, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689][0m
[34m#015Validating:  24%|██▍       | 150/619 [00:08<00:25, 18.23it/s]#033[A#015Epoch 1:  95%|█████████▍| 7950/8408 [07:46<00:26, 17.03it/s, loss=2.64, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0689][0m
[34m#015Validating:  29%|██▉       | 180/619 [00:09<00:23, 18.59it/s]#033[A#015Epoch 1:  95%|█████████▍| 7980/8408 [07:48<00:25, 17.04it/s, loss=2.64, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_

[34m0.0702]#015Epoch 2:  16%|█▌        | 1350/8408 [01:19<06:53, 17.06it/s, loss=2.22, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  16%|█▌        | 1350/8408 [01:19<06:53, 17.06it/s, loss=1.97, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.484, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  16%|█▋        | 1380/8408 [01:20<06:51, 17.06it/s, loss=1.97, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.484, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  16%|█▋        | 1380/8408 [01:20<06:51, 17.06it/s, loss=2.11, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  17%|█▋        | 1410/8408 [01:22<06:49, 17.09it/s, loss=2.11, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  17%|█▋        | 1410/8408 [01:22<06:49, 17.09it/s, loss=2.4

[34moss=1.73, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  32%|███▏      | 2670/8408 [02:36<05:36, 17.03it/s, loss=1.52, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.484, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  32%|███▏      | 2700/8408 [02:38<05:35, 17.02it/s, loss=1.52, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.484, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  32%|███▏      | 2700/8408 [02:38<05:35, 17.02it/s, loss=3.09, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  32%|███▏      | 2730/8408 [02:40<05:33, 17.01it/s, loss=3.09, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  32%|███▏      | 2730/8408 [02:40<05:33, 17.01it/s, loss=2.45, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.479, val

[34m | 3960/8408 [03:53<04:21, 16.98it/s, loss=2.19, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  47%|████▋     | 3990/8408 [03:54<04:20, 16.98it/s, loss=2.19, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  47%|████▋     | 3990/8408 [03:54<04:20, 16.98it/s, loss=2.21, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  48%|████▊     | 4020/8408 [03:56<04:18, 16.99it/s, loss=2.21, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  48%|████▊     | 4020/8408 [03:56<04:18, 16.99it/s, loss=2.14, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  48%|████▊     | 4050/8408 [03:58<04:16, 16.98it/s, loss=2.14, v_num=0, val_f1_epoch=0.485, va

[34m3, train_f1=0.484, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  62%|██████▏   | 5250/8408 [05:09<03:05, 16.98it/s, loss=2.05, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  63%|██████▎   | 5280/8408 [05:11<03:04, 16.97it/s, loss=2.05, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  63%|██████▎   | 5280/8408 [05:11<03:04, 16.97it/s, loss=2.66, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  63%|██████▎   | 5310/8408 [05:12<03:02, 16.98it/s, loss=2.66, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.482, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  63%|██████▎   | 5310/8408 [05:12<03:02, 16.98it/s, loss=2.01, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.484, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  64%|███

[34mf1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  77%|███████▋  | 6510/8408 [06:23<01:51, 16.99it/s, loss=2.13, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.478, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  78%|███████▊  | 6540/8408 [06:24<01:49, 16.99it/s, loss=2.13, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.478, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  78%|███████▊  | 6540/8408 [06:24<01:49, 16.99it/s, loss=2.42, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  78%|███████▊  | 6570/8408 [06:26<01:48, 16.99it/s, loss=2.42, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.481, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  78%|███████▊  | 6570/8408 [06:26<01:48, 16.99it/s, loss=2.41, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.485, val_f1_step=0.500, val_loss_step=0.0702]#015Epoch 2:  78%|███████▊  | 6600/8408 [06:

[34m#015Validating:   0%|          | 0/619 [00:00<?, ?it/s]#033[A[0m
[34m#015Validating:   5%|▍         | 30/619 [00:01<00:33, 17.38it/s]#033[A#015Epoch 2:  93%|█████████▎| 7830/8408 [07:40<00:33, 17.00it/s, loss=2.07, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0702][0m
[34m#015Validating:  10%|▉         | 60/619 [00:03<00:31, 17.54it/s]#033[A#015Epoch 2:  93%|█████████▎| 7860/8408 [07:42<00:32, 17.01it/s, loss=2.07, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0702][0m
[34m#015Validating:  15%|█▍        | 90/619 [00:05<00:29, 17.64it/s]#033[A#015Epoch 2:  94%|█████████▍| 7890/8408 [07:43<00:30, 17.01it/s, loss=2.07, v_num=0, val_f1_epoch=0.485, val_loss_epoch=0.193, train_f1=0.483, val_f1_step=0.500, val_loss_step=0.0702][0m
[34m#015Validating:  19%|█▉        | 120/619 [00:06<00:27, 17.91it/s]#033[A#015Epoch 2:  94%|█████████▍| 7920/8408 [07:45<00:28, 17.01it/s,

[34mtrain_f1=0.654]#015Epoch 0:  20%|██        | 1710/8428 [01:40<06:33, 17.08it/s, loss=1.83, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.654]#015Epoch 0:  20%|██        | 1710/8428 [01:40<06:33, 17.08it/s, loss=1.88, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.686]#015Epoch 0:  21%|██        | 1740/8428 [01:41<06:31, 17.08it/s, loss=1.88, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.686]#015Epoch 0:  21%|██        | 1740/8428 [01:41<06:31, 17.08it/s, loss=1.66, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.713]#015Epoch 0:  21%|██        | 1770/8428 [01:43<06:30, 17.07it/s, loss=1.66, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.713]#015Epoch 0:  21%|██        | 1770/8428 [01:43<06:30, 17.07it/s, loss=1.99, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.745]#015Epoch 0:  21%|██▏       | 1800/8428 [01:45<06:28, 17.07it/s, loss=1.99, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0

[34mnum=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.686]#015Epoch 0:  40%|████      | 3390/8428 [03:18<04:54, 17.09it/s, loss=1.69, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.686]#015Epoch 0:  40%|████      | 3390/8428 [03:18<04:54, 17.09it/s, loss=1.43, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.730]#015Epoch 0:  41%|████      | 3420/8428 [03:20<04:53, 17.09it/s, loss=1.43, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.730]#015Epoch 0:  41%|████      | 3420/8428 [03:20<04:53, 17.09it/s, loss=1.55, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.779]#015Epoch 0:  41%|████      | 3450/8428 [03:21<04:51, 17.09it/s, loss=1.55, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.779]#015Epoch 0:  41%|████      | 3450/8428 [03:21<04:51, 17.09it/s, loss=1.19, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.667]#015Epoch 0:  41%|████▏     | 3480/8428 [03:23<04:49, 17.09it/s, loss=1.

[34m8428 [04:53<03:20, 17.07it/s, loss=1.22, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.773]#015Epoch 0:  59%|█████▉    | 5010/8428 [04:53<03:20, 17.07it/s, loss=1.75, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.762]#015Epoch 0:  60%|█████▉    | 5040/8428 [04:55<03:18, 17.08it/s, loss=1.75, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.762]#015Epoch 0:  60%|█████▉    | 5040/8428 [04:55<03:18, 17.08it/s, loss=1.33, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.697]#015Epoch 0:  60%|██████    | 5070/8428 [04:56<03:16, 17.08it/s, loss=1.33, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.697]#015Epoch 0:  60%|██████    | 5070/8428 [04:56<03:16, 17.08it/s, loss=1.73, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.677]#015Epoch 0:  61%|██████    | 5100/8428 [04:58<03:14, 17.07it/s, loss=1.73, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.677]#015Epoch 0:  61%|██████    |

[34m01:47, 17.07it/s, loss=1.22, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.735]#015Epoch 0:  78%|███████▊  | 6600/8428 [06:26<01:47, 17.07it/s, loss=1.69, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.747]#015Epoch 0:  79%|███████▊  | 6630/8428 [06:28<01:45, 17.08it/s, loss=1.69, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.747]#015Epoch 0:  79%|███████▊  | 6630/8428 [06:28<01:45, 17.08it/s, loss=1.19, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.707]#015Epoch 0:  79%|███████▉  | 6660/8428 [06:30<01:43, 17.08it/s, loss=1.19, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.707]#015Epoch 0:  79%|███████▉  | 6660/8428 [06:30<01:43, 17.08it/s, loss=1.67, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.672]#015Epoch 0:  79%|███████▉  | 6690/8428 [06:31<01:41, 17.08it/s, loss=1.67, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.672]#015Epoch 0:  79%|███████▉  | 6690/8428 [

[34m#015Validating:  10%|█         | 60/580 [00:03<00:29, 17.37it/s]#033[A#015Epoch 0:  94%|█████████▍| 7920/8428 [07:44<00:29, 17.06it/s, loss=1.64, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.776][0m
[34m#015Validating:  16%|█▌        | 90/580 [00:05<00:28, 17.40it/s]#033[A#015Epoch 0:  94%|█████████▍| 7950/8428 [07:46<00:28, 17.06it/s, loss=1.64, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.776][0m
[34m#015Validating:  21%|██        | 120/580 [00:06<00:26, 17.52it/s]#033[A#015Epoch 0:  95%|█████████▍| 7980/8428 [07:47<00:26, 17.06it/s, loss=1.64, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.776][0m
[34m#015Validating:  26%|██▌       | 150/580 [00:08<00:23, 18.00it/s]#033[A#015Epoch 0:  95%|█████████▌| 8010/8428 [07:49<00:24, 17.07it/s, loss=1.64, v_num=0, val_f1_epoch=0.0902, val_loss_epoch=0.700, train_f1=0.776][0m
[34m#015Validating:  31%|███       | 180/580 [00:09<00:21, 18.29it/s]#033[A#015Epoch 0:  95%|█████████▌| 

[34ms=1.11, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.700, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  16%|█▌        | 1350/8428 [01:20<06:59, 16.85it/s, loss=0.996, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.840, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  16%|█▋        | 1380/8428 [01:21<06:58, 16.83it/s, loss=0.996, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.840, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  16%|█▋        | 1380/8428 [01:21<06:58, 16.83it/s, loss=1.09, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.752, val_f1_step=0.467, val_loss_step=0.344] #015Epoch 1:  17%|█▋        | 1410/8428 [01:23<06:57, 16.81it/s, loss=1.09, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.752, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  17%|█▋        | 1410/8428 [01:23<06:57, 16.81it/s, loss=1.33, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.750, val_f1_

[34m�      | 2670/8428 [02:37<05:40, 16.91it/s, loss=1.04, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.734, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  32%|███▏      | 2700/8428 [02:39<05:38, 16.92it/s, loss=1.04, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.734, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  32%|███▏      | 2700/8428 [02:39<05:38, 16.92it/s, loss=1.44, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.726, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  32%|███▏      | 2730/8428 [02:41<05:36, 16.91it/s, loss=1.44, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.726, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  32%|███▏      | 2730/8428 [02:41<05:36, 16.91it/s, loss=1.05, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.818, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  33%|███▎      | 2760/8428 [02:43<05:35, 16.91it/s, loss=1.05, v_num=0, val_f1_epoch=0.686, v

[34m, 16.94it/s, loss=1.16, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.669, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  47%|████▋     | 3990/8428 [03:55<04:22, 16.94it/s, loss=0.959, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.846, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  48%|████▊     | 4020/8428 [03:57<04:20, 16.93it/s, loss=0.959, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.846, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  48%|████▊     | 4020/8428 [03:57<04:20, 16.93it/s, loss=1.25, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.745, val_f1_step=0.467, val_loss_step=0.344] #015Epoch 1:  48%|████▊     | 4050/8428 [03:59<04:18, 16.94it/s, loss=1.25, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.745, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  48%|████▊     | 4050/8428 [03:59<04:18, 16.94it/s, loss=1.18, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f

[34m  | 5280/8428 [05:11<03:05, 16.94it/s, loss=1.44, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.796, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  63%|██████▎   | 5280/8428 [05:11<03:05, 16.94it/s, loss=1.34, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.763, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  63%|██████▎   | 5310/8428 [05:13<03:04, 16.93it/s, loss=1.34, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.763, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  63%|██████▎   | 5310/8428 [05:13<03:04, 16.93it/s, loss=1.05, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.701, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  63%|██████▎   | 5340/8428 [05:15<03:02, 16.93it/s, loss=1.05, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.701, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  63%|██████▎   | 5340/8428 [05:15<03:02, 16.93it/s, loss=1.66, v_num=0, val_f1_epoch=0.686, val_lo

[34ml_loss_epoch=0.245, train_f1=0.709, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  78%|███████▊  | 6540/8428 [06:26<01:51, 16.93it/s, loss=1.01, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.766, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  78%|███████▊  | 6570/8428 [06:28<01:49, 16.93it/s, loss=1.01, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.766, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  78%|███████▊  | 6570/8428 [06:28<01:49, 16.93it/s, loss=1.21, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.757, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  78%|███████▊  | 6600/8428 [06:29<01:47, 16.94it/s, loss=1.21, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.757, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  78%|███████▊  | 6600/8428 [06:29<01:47, 16.94it/s, loss=0.816, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.758, val_f1_step=0.467, val_loss_step=0.344]#015Epoch

[34mal_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  93%|█████████▎| 7800/8428 [07:40<00:37, 16.94it/s, loss=0.988, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.752, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  93%|█████████▎| 7800/8428 [07:40<00:37, 16.94it/s, loss=1.02, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.788, val_f1_step=0.467, val_loss_step=0.344] #015Epoch 1:  93%|█████████▎| 7830/8428 [07:42<00:35, 16.94it/s, loss=1.02, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.788, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  93%|█████████▎| 7830/8428 [07:42<00:35, 16.94it/s, loss=0.923, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.745, val_f1_step=0.467, val_loss_step=0.344]#015Epoch 1:  93%|█████████▎| 7860/8428 [07:43<00:33, 16.95it/s, loss=0.923, v_num=0, val_f1_epoch=0.686, val_loss_epoch=0.245, train_f1=0.745, val_f1_step=0.467, val_loss_step=0.344][0m
[34m#015Validating: 0it [00:00, ?it/s

[34m/8428 [01:19<06:57, 16.97it/s, loss=1.01, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.804, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  16%|█▌        | 1350/8428 [01:19<06:57, 16.97it/s, loss=0.923, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.781, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  16%|█▋        | 1380/8428 [01:21<06:55, 16.96it/s, loss=0.923, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.781, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  16%|█▋        | 1380/8428 [01:21<06:55, 16.96it/s, loss=1.4, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.800, val_f1_step=0.467, val_loss_step=0.446]  #015Epoch 2:  17%|█▋        | 1410/8428 [01:23<06:54, 16.95it/s, loss=1.4, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.800, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  17%|█▋        | 1410/8428 [01:23<06:54, 16.95it/s, loss=1.03, v_num=0, val_f1_epoch=0.704, val_loss_epo

[34m467, val_loss_step=0.446]#015Epoch 2:  32%|███▏      | 2670/8428 [02:37<05:39, 16.98it/s, loss=1.21, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.775, val_f1_step=0.467, val_loss_step=0.446] #015Epoch 2:  32%|███▏      | 2700/8428 [02:38<05:37, 16.99it/s, loss=1.21, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.775, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  32%|███▏      | 2700/8428 [02:38<05:37, 16.99it/s, loss=1.18, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.744, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  32%|███▏      | 2730/8428 [02:40<05:35, 16.99it/s, loss=1.18, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.744, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  32%|███▏      | 2730/8428 [02:40<05:35, 16.99it/s, loss=1.24, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.772, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  33%|███▎      | 2760/8428 [02:42<05:33, 16.99

[34mal_loss_step=0.446] #015Epoch 2:  47%|████▋     | 3990/8428 [03:55<04:21, 16.98it/s, loss=1.06, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.833, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  47%|████▋     | 3990/8428 [03:55<04:21, 16.98it/s, loss=1.17, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.807, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  48%|████▊     | 4020/8428 [03:56<04:19, 16.98it/s, loss=1.17, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.807, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  48%|████▊     | 4020/8428 [03:56<04:19, 16.98it/s, loss=0.993, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.792, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  48%|████▊     | 4050/8428 [03:58<04:17, 16.99it/s, loss=0.993, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.792, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  48%|████▊     | 4050/8428 [03:58<04:17, 16.99it/s

[34mtrain_f1=0.808, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  63%|██████▎   | 5280/8428 [05:11<03:05, 16.97it/s, loss=0.915, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.808, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  63%|██████▎   | 5280/8428 [05:11<03:05, 16.97it/s, loss=0.92, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.757, val_f1_step=0.467, val_loss_step=0.446] #015Epoch 2:  63%|██████▎   | 5310/8428 [05:12<03:03, 16.97it/s, loss=0.92, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.757, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  63%|██████▎   | 5310/8428 [05:12<03:03, 16.97it/s, loss=0.706, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.797, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  63%|██████▎   | 5340/8428 [05:14<03:01, 16.97it/s, loss=0.706, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.797, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  63%|██████▎ 

[34mpoch 2:  78%|███████▊  | 6540/8428 [06:25<01:51, 16.97it/s, loss=0.765, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.886, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  78%|███████▊  | 6540/8428 [06:25<01:51, 16.97it/s, loss=1.26, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.803, val_f1_step=0.467, val_loss_step=0.446] #015Epoch 2:  78%|███████▊  | 6570/8428 [06:27<01:49, 16.97it/s, loss=1.26, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.803, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  78%|███████▊  | 6570/8428 [06:27<01:49, 16.97it/s, loss=0.88, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.826, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  78%|███████▊  | 6600/8428 [06:28<01:47, 16.97it/s, loss=0.88, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.826, val_f1_step=0.467, val_loss_step=0.446]#015Epoch 2:  78%|███████▊  | 6600/8428 [06:28<01:47, 16.97it/s, loss=1.13, v_num=0, val

[34m#015Validating:  10%|█         | 60/580 [00:03<00:30, 17.23it/s]#033[A#015Epoch 2:  94%|█████████▍| 7920/8428 [07:46<00:29, 16.98it/s, loss=1.5, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.805, val_f1_step=0.467, val_loss_step=0.446][0m
[34m#015Validating:  16%|█▌        | 90/580 [00:05<00:28, 17.34it/s]#033[A#015Epoch 2:  94%|█████████▍| 7950/8428 [07:48<00:28, 16.98it/s, loss=1.5, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.805, val_f1_step=0.467, val_loss_step=0.446][0m
[34m#015Validating:  21%|██        | 120/580 [00:06<00:26, 17.43it/s]#033[A#015Epoch 2:  95%|█████████▍| 7980/8428 [07:49<00:26, 16.99it/s, loss=1.5, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.805, val_f1_step=0.467, val_loss_step=0.446][0m
[34m#015Validating:  26%|██▌       | 150/580 [00:08<00:23, 17.94it/s]#033[A#015Epoch 2:  95%|█████████▌| 8010/8428 [07:51<00:24, 16.99it/s, loss=1.5, v_num=0, val_f1_epoch=0.704, val_loss_epoch=0.229, train_f1=0.805

[34m 0:  17%|█▋        | 1710/10238 [01:41<08:24, 16.91it/s, loss=1.93, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.531]#015Epoch 0:  17%|█▋        | 1710/10238 [01:41<08:24, 16.91it/s, loss=2.19, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.549]#015Epoch 0:  17%|█▋        | 1740/10238 [01:42<08:22, 16.92it/s, loss=2.19, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.549]#015Epoch 0:  17%|█▋        | 1740/10238 [01:42<08:22, 16.92it/s, loss=1.85, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.550]#015Epoch 0:  17%|█▋        | 1770/10238 [01:44<08:20, 16.92it/s, loss=1.85, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.550]#015Epoch 0:  17%|█▋        | 1770/10238 [01:44<08:20, 16.92it/s, loss=1.86, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.503]#015Epoch 0:  18%|█▊        | 1800/10238 [01:46<08:18, 16.91it/s, loss=1.86, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.503]#01

[34m, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.518]#015Epoch 0:  33%|███▎      | 3390/10238 [03:20<06:44, 16.95it/s, loss=1.75, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.505]#015Epoch 0:  33%|███▎      | 3420/10238 [03:21<06:42, 16.94it/s, loss=1.75, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.505]#015Epoch 0:  33%|███▎      | 3420/10238 [03:21<06:42, 16.94it/s, loss=1.85, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.484]#015Epoch 0:  34%|███▎      | 3450/10238 [03:23<06:40, 16.94it/s, loss=1.85, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.484]#015Epoch 0:  34%|███▎      | 3450/10238 [03:23<06:40, 16.94it/s, loss=2.17, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.530]#015Epoch 0:  34%|███▍      | 3480/10238 [03:25<06:38, 16.95it/s, loss=2.17, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.530]#015Epoch 0:  34%|███▍      | 3480/10238 [03:25<06:38, 16.95it/s, los

[34m33, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.531]#015Epoch 0:  49%|████▉     | 5040/10238 [04:57<05:06, 16.95it/s, loss=2.08, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.531]#015Epoch 0:  50%|████▉     | 5070/10238 [04:59<05:04, 16.94it/s, loss=2.08, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.531]#015Epoch 0:  50%|████▉     | 5070/10238 [04:59<05:04, 16.94it/s, loss=1.71, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.546]#015Epoch 0:  50%|████▉     | 5100/10238 [05:01<05:03, 16.94it/s, loss=1.71, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.546]#015Epoch 0:  50%|████▉     | 5100/10238 [05:01<05:03, 16.94it/s, loss=1.81, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.532]#015Epoch 0:  50%|█████     | 5130/10238 [05:02<05:01, 16.93it/s, loss=1.81, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.532]#015Epoch 0:  50%|█████     | 5130/10238 [05:02<05:01, 16.93it/s, l

[34m█▌   | 6660/10238 [06:32<03:31, 16.95it/s, loss=1.67, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.503]#015Epoch 0:  65%|██████▌   | 6660/10238 [06:32<03:31, 16.95it/s, loss=1.71, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.527]#015Epoch 0:  65%|██████▌   | 6690/10238 [06:34<03:29, 16.95it/s, loss=1.71, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.527]#015Epoch 0:  65%|██████▌   | 6690/10238 [06:34<03:29, 16.95it/s, loss=1.68, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.518]#015Epoch 0:  66%|██████▌   | 6720/10238 [06:36<03:27, 16.95it/s, loss=1.68, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.518]#015Epoch 0:  66%|██████▌   | 6720/10238 [06:36<03:27, 16.95it/s, loss=2.02, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.555]#015Epoch 0:  66%|██████▌   | 6750/10238 [06:38<03:25, 16.95it/s, loss=2.02, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.555]#015Epoch 0:  66%

[34mpoch=0.728, train_f1=0.523]#015Epoch 0:  81%|████████  | 8250/10238 [08:06<01:57, 16.97it/s, loss=1.64, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.523]#015Epoch 0:  81%|████████  | 8250/10238 [08:06<01:57, 16.97it/s, loss=2.26, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.555]#015Epoch 0:  81%|████████  | 8280/10238 [08:07<01:55, 16.97it/s, loss=2.26, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.555]#015Epoch 0:  81%|████████  | 8280/10238 [08:07<01:55, 16.97it/s, loss=1.65, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.535]#015Epoch 0:  81%|████████  | 8310/10238 [08:09<01:53, 16.97it/s, loss=1.65, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.535]#015Epoch 0:  81%|████████  | 8310/10238 [08:09<01:53, 16.97it/s, loss=1.79, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.480]#015Epoch 0:  81%|████████▏ | 8340/10238 [08:11<01:51, 16.97it/s, loss=1.79, v_num=0, val_f1_epoch=0.072, val_

[34m#015Validating:  11%|█         | 60/545 [00:03<00:27, 17.96it/s]#033[A#015Epoch 0:  96%|█████████▌| 9780/10238 [09:35<00:26, 17.01it/s, loss=1.73, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.536][0m
[34m#015Validating:  17%|█▋        | 90/545 [00:05<00:25, 17.88it/s]#033[A#015Epoch 0:  96%|█████████▌| 9810/10238 [09:36<00:25, 17.01it/s, loss=1.73, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.536][0m
[34m#015Validating:  22%|██▏       | 120/545 [00:06<00:23, 17.99it/s]#033[A#015Epoch 0:  96%|█████████▌| 9840/10238 [09:38<00:23, 17.01it/s, loss=1.73, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.536][0m
[34m#015Validating:  28%|██▊       | 150/545 [00:08<00:21, 18.53it/s]#033[A#015Epoch 0:  96%|█████████▋| 9870/10238 [09:39<00:21, 17.02it/s, loss=1.73, v_num=0, val_f1_epoch=0.072, val_loss_epoch=0.728, train_f1=0.536][0m
[34m#015Validating:  33%|███▎      | 180/545 [00:09<00:19, 18.71it/s]#033[A#015Epoch 0:  97%|█████████▋| 

[34m_epoch=0.159, train_f1=0.575, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  13%|█▎        | 1350/10238 [01:19<08:44, 16.96it/s, loss=1.83, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.575, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  13%|█▎        | 1350/10238 [01:19<08:44, 16.96it/s, loss=1.44, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.526, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  13%|█▎        | 1380/10238 [01:21<08:42, 16.95it/s, loss=1.44, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.526, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  13%|█▎        | 1380/10238 [01:21<08:42, 16.95it/s, loss=1.63, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.493, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  14%|█▍        | 1410/10238 [01:23<08:41, 16.93it/s, loss=1.63, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.493, val_f1_step=0.493, val_loss_step=0.0857]#015E

[34m93, val_loss_step=0.0857]#015Epoch 1:  26%|██▌       | 2670/10238 [02:37<07:26, 16.95it/s, loss=1.44, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.530, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  26%|██▌       | 2670/10238 [02:37<07:26, 16.95it/s, loss=1.27, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.503, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  26%|██▋       | 2700/10238 [02:39<07:24, 16.95it/s, loss=1.27, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.503, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  26%|██▋       | 2700/10238 [02:39<07:24, 16.95it/s, loss=1.52, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.539, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  27%|██▋       | 2730/10238 [02:41<07:23, 16.95it/s, loss=1.52, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.539, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  27%|██▋       | 2730/10238 [02:41<07

[34mtep=0.0857]#015Epoch 1:  39%|███▊      | 3960/10238 [03:53<06:10, 16.93it/s, loss=1.73, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.578, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  39%|███▉      | 3990/10238 [03:55<06:09, 16.93it/s, loss=1.73, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.578, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  39%|███▉      | 3990/10238 [03:55<06:09, 16.93it/s, loss=1.73, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.573, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  39%|███▉      | 4020/10238 [03:57<06:07, 16.93it/s, loss=1.73, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.573, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  39%|███▉      | 4020/10238 [03:57<06:07, 16.93it/s, loss=1.4, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.573, val_f1_step=0.493, val_loss_step=0.0857] #015Epoch 1:  40%|███▉      | 4050/10238 [03:59<06:05, 16.94it/s

[34mss_step=0.0857]#015Epoch 1:  51%|█████▏    | 5250/10238 [05:10<04:54, 16.93it/s, loss=1.27, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.585, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  52%|█████▏    | 5280/10238 [05:11<04:52, 16.93it/s, loss=1.27, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.585, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  52%|█████▏    | 5280/10238 [05:11<04:52, 16.93it/s, loss=1.57, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.577, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  52%|█████▏    | 5310/10238 [05:13<04:51, 16.93it/s, loss=1.57, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.577, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  52%|█████▏    | 5310/10238 [05:13<04:51, 16.93it/s, loss=1.69, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.566, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  52%|█████▏    | 5340/10238 [05:15<04:49, 16.93

[34m=0.159, train_f1=0.500, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  64%|██████▍   | 6540/10238 [06:26<03:38, 16.92it/s, loss=1.43, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.500, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  64%|██████▍   | 6540/10238 [06:26<03:38, 16.92it/s, loss=1.79, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.532, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  64%|██████▍   | 6570/10238 [06:28<03:36, 16.92it/s, loss=1.79, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.532, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  64%|██████▍   | 6570/10238 [06:28<03:36, 16.92it/s, loss=1.34, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.581, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  64%|██████▍   | 6600/10238 [06:29<03:34, 16.92it/s, loss=1.34, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.581, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1

[34ms, loss=1.35, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.510, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  76%|███████▌  | 7800/10238 [07:40<02:24, 16.92it/s, loss=1.35, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.510, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  76%|███████▌  | 7800/10238 [07:40<02:24, 16.92it/s, loss=1.44, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.573, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  76%|███████▋  | 7830/10238 [07:42<02:22, 16.92it/s, loss=1.44, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.573, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  76%|███████▋  | 7830/10238 [07:42<02:22, 16.92it/s, loss=1.2, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.541, val_f1_step=0.493, val_loss_step=0.0857] #015Epoch 1:  77%|███████▋  | 7860/10238 [07:44<02:20, 16.92it/s, loss=1.2, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.

[34m�██████▊ | 9030/10238 [08:53<01:11, 16.92it/s, loss=2.03, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.585, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  88%|████████▊ | 9030/10238 [08:53<01:11, 16.92it/s, loss=1.39, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.513, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  88%|████████▊ | 9060/10238 [08:55<01:09, 16.92it/s, loss=1.39, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.513, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  88%|████████▊ | 9060/10238 [08:55<01:09, 16.92it/s, loss=2, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.519, val_f1_step=0.493, val_loss_step=0.0857]   #015Epoch 1:  89%|████████▉ | 9090/10238 [08:57<01:07, 16.92it/s, loss=2, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.519, val_f1_step=0.493, val_loss_step=0.0857]#015Epoch 1:  89%|████████▉ | 9090/10238 [08:57<01:07, 16.92it/s, loss=1.65, v_num=0, val_f1_epoc

[34m#015Validating:  11%|█         | 60/545 [00:03<00:27, 17.44it/s]#033[A#015Epoch 1:  96%|█████████▌| 9780/10238 [09:36<00:27, 16.96it/s, loss=1.28, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.552, val_f1_step=0.493, val_loss_step=0.0857][0m
[34m#015Validating:  17%|█▋        | 90/545 [00:05<00:26, 17.31it/s]#033[A#015Epoch 1:  96%|█████████▌| 9810/10238 [09:38<00:25, 16.96it/s, loss=1.28, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.552, val_f1_step=0.493, val_loss_step=0.0857][0m
[34m#015Validating:  22%|██▏       | 120/545 [00:06<00:24, 17.51it/s]#033[A#015Epoch 1:  96%|█████████▌| 9840/10238 [09:40<00:23, 16.96it/s, loss=1.28, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, train_f1=0.552, val_f1_step=0.493, val_loss_step=0.0857][0m
[34m#015Validating:  28%|██▊       | 150/545 [00:08<00:21, 18.00it/s]#033[A#015Epoch 1:  96%|█████████▋| 9870/10238 [09:41<00:21, 16.97it/s, loss=1.28, v_num=0, val_f1_epoch=0.535, val_loss_epoch=0.159, tra

[34mh=0.147, train_f1=0.537, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  13%|█▎        | 1350/10238 [01:19<08:46, 16.88it/s, loss=1.57, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.537, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  13%|█▎        | 1350/10238 [01:19<08:46, 16.88it/s, loss=1.77, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.615, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  13%|█▎        | 1380/10238 [01:21<08:44, 16.88it/s, loss=1.77, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.615, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  13%|█▎        | 1380/10238 [01:21<08:44, 16.88it/s, loss=1.87, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.541, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  14%|█▍        | 1410/10238 [01:23<08:42, 16.89it/s, loss=1.87, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.541, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 

[34mal_loss_step=0.0831]#015Epoch 2:  26%|██▌       | 2670/10238 [02:38<07:28, 16.88it/s, loss=1.53, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.551, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  26%|██▌       | 2670/10238 [02:38<07:28, 16.88it/s, loss=1.52, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.584, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  26%|██▋       | 2700/10238 [02:39<07:26, 16.89it/s, loss=1.52, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.584, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  26%|██▋       | 2700/10238 [02:39<07:26, 16.89it/s, loss=1.16, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.548, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  27%|██▋       | 2730/10238 [02:41<07:24, 16.89it/s, loss=1.16, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.548, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  27%|██▋       | 2730/10238 [02:41<07:24, 

[34m31]#015Epoch 2:  39%|███▊      | 3960/10238 [03:54<06:11, 16.92it/s, loss=1.23, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.564, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  39%|███▉      | 3990/10238 [03:55<06:09, 16.91it/s, loss=1.23, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.564, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  39%|███▉      | 3990/10238 [03:55<06:09, 16.91it/s, loss=1.56, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.564, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  39%|███▉      | 4020/10238 [03:57<06:07, 16.91it/s, loss=1.56, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.564, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  39%|███▉      | 4020/10238 [03:57<06:07, 16.91it/s, loss=1.09, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.579, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  40%|███▉      | 4050/10238 [03:59<06:05, 16.92it/s, loss=1

[34mtep=0.0831]#015Epoch 2:  51%|█████▏    | 5250/10238 [05:10<04:54, 16.93it/s, loss=1.31, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.556, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  52%|█████▏    | 5280/10238 [05:11<04:52, 16.93it/s, loss=1.31, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.556, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  52%|█████▏    | 5280/10238 [05:11<04:52, 16.93it/s, loss=1.49, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.586, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  52%|█████▏    | 5310/10238 [05:13<04:51, 16.93it/s, loss=1.49, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.586, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  52%|█████▏    | 5310/10238 [05:13<04:51, 16.93it/s, loss=1.35, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.625, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  52%|█████▏    | 5340/10238 [05:15<04:49, 16.93it/s

[34m147, train_f1=0.583, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  64%|██████▍   | 6540/10238 [06:26<03:38, 16.91it/s, loss=1.55, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.583, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  64%|██████▍   | 6540/10238 [06:26<03:38, 16.91it/s, loss=1.58, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.523, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  64%|██████▍   | 6570/10238 [06:28<03:36, 16.91it/s, loss=1.58, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.523, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  64%|██████▍   | 6570/10238 [06:28<03:36, 16.91it/s, loss=1.74, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.625, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  64%|██████▍   | 6600/10238 [06:30<03:35, 16.92it/s, loss=1.74, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.625, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  

[34m loss=1.39, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.583, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  76%|███████▌  | 7800/10238 [07:40<02:24, 16.92it/s, loss=1.39, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.583, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  76%|███████▌  | 7800/10238 [07:40<02:24, 16.92it/s, loss=1.57, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.548, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  76%|███████▋  | 7830/10238 [07:42<02:22, 16.92it/s, loss=1.57, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.548, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  76%|███████▋  | 7830/10238 [07:42<02:22, 16.92it/s, loss=1.31, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.593, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  77%|███████▋  | 7860/10238 [07:44<02:20, 16.92it/s, loss=1.31, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.5

[34m�█████▊ | 9030/10238 [08:53<01:11, 16.93it/s, loss=1.6, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.559, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  88%|████████▊ | 9030/10238 [08:53<01:11, 16.93it/s, loss=1.39, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.567, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  88%|████████▊ | 9060/10238 [08:55<01:09, 16.93it/s, loss=1.39, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.567, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  88%|████████▊ | 9060/10238 [08:55<01:09, 16.93it/s, loss=1.48, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.605, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  89%|████████▉ | 9090/10238 [08:57<01:07, 16.93it/s, loss=1.48, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.605, val_f1_step=0.492, val_loss_step=0.0831]#015Epoch 2:  89%|████████▉ | 9090/10238 [08:57<01:07, 16.93it/s, loss=0.98, v_num=0, val_f1_epo

[34m#015Validating:  28%|██▊       | 150/545 [00:08<00:21, 18.55it/s]#033[A#015Epoch 2:  96%|█████████▋| 9870/10238 [09:40<00:21, 16.99it/s, loss=1.55, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.555, val_f1_step=0.492, val_loss_step=0.0831][0m
[34m#015Validating:  33%|███▎      | 180/545 [00:09<00:19, 18.63it/s]#033[A#015Epoch 2:  97%|█████████▋| 9900/10238 [09:42<00:19, 17.00it/s, loss=1.55, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.555, val_f1_step=0.492, val_loss_step=0.0831][0m
[34m#015Validating:  39%|███▊      | 210/545 [00:11<00:18, 18.52it/s]#033[A#015Epoch 2:  97%|█████████▋| 9930/10238 [09:44<00:18, 17.00it/s, loss=1.55, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, train_f1=0.555, val_f1_step=0.492, val_loss_step=0.0831][0m
[34m#015Validating:  44%|████▍     | 240/545 [00:13<00:16, 18.23it/s]#033[A#015Epoch 2:  97%|█████████▋| 9960/10238 [09:45<00:16, 17.00it/s, loss=1.55, v_num=0, val_f1_epoch=0.565, val_loss_epoch=0.147, t

[34m08:19, 17.00it/s, loss=3.45, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  17%|█▋        | 1710/10169 [01:40<08:17, 16.99it/s, loss=3.45, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  17%|█▋        | 1710/10169 [01:40<08:17, 16.99it/s, loss=3.02, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  17%|█▋        | 1740/10169 [01:42<08:15, 17.00it/s, loss=3.02, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  17%|█▋        | 1740/10169 [01:42<08:15, 17.00it/s, loss=2.93, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  17%|█▋        | 1770/10169 [01:44<08:14, 17.00it/s, loss=2.93, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  17%|█▋        | 1770/10169 [01:44<08:14, 17.00it/s, loss=2.74, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  18%|█▊        | 1800/

[34m 33%|███▎      | 3360/10169 [03:17<06:40, 16.98it/s, loss=2.24, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  33%|███▎      | 3390/10169 [03:19<06:39, 16.99it/s, loss=2.24, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  33%|███▎      | 3390/10169 [03:19<06:39, 16.99it/s, loss=2.88, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  34%|███▎      | 3420/10169 [03:21<06:37, 16.99it/s, loss=2.88, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  34%|███▎      | 3420/10169 [03:21<06:37, 16.99it/s, loss=2.86, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  34%|███▍      | 3450/10169 [03:23<06:35, 16.98it/s, loss=2.86, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  34%|███▍      | 3450/10169 [03:23<06:35, 16.98it/s, loss=2.57, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]

[34m5:04, 16.96it/s, loss=2.63, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  49%|████▉     | 5010/10169 [04:55<05:04, 16.96it/s, loss=2.52, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  50%|████▉     | 5040/10169 [04:57<05:02, 16.97it/s, loss=2.52, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  50%|████▉     | 5040/10169 [04:57<05:02, 16.97it/s, loss=2.49, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  50%|████▉     | 5070/10169 [04:58<05:00, 16.97it/s, loss=2.49, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  50%|████▉     | 5070/10169 [04:58<05:00, 16.97it/s, loss=3.73, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.494]#015Epoch 0:  50%|█████     | 5100/10169 [05:00<04:58, 16.97it/s, loss=3.73, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.494]#015Epoch 0:  50%|█████     | 5100/1

[34m03:30, 16.93it/s, loss=2.62, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.497]#015Epoch 0:  65%|██████▌   | 6630/10169 [06:31<03:29, 16.93it/s, loss=2.62, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.497]#015Epoch 0:  65%|██████▌   | 6630/10169 [06:31<03:29, 16.93it/s, loss=2.97, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  65%|██████▌   | 6660/10169 [06:33<03:27, 16.93it/s, loss=2.97, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  65%|██████▌   | 6660/10169 [06:33<03:27, 16.93it/s, loss=2.64, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  66%|██████▌   | 6690/10169 [06:35<03:25, 16.93it/s, loss=2.64, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.495]#015Epoch 0:  66%|██████▌   | 6690/10169 [06:35<03:25, 16.93it/s, loss=2.76, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  66%|██████▌   | 6720/

[34m169 [08:03<01:56, 16.93it/s, loss=2.83, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  81%|████████  | 8190/10169 [08:03<01:56, 16.93it/s, loss=2.94, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.497]#015Epoch 0:  81%|████████  | 8220/10169 [08:05<01:55, 16.93it/s, loss=2.94, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.497]#015Epoch 0:  81%|████████  | 8220/10169 [08:05<01:55, 16.93it/s, loss=2.63, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  81%|████████  | 8250/10169 [08:07<01:53, 16.93it/s, loss=2.63, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  81%|████████  | 8250/10169 [08:07<01:53, 16.93it/s, loss=3.19, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  81%|████████▏ | 8280/10169 [08:09<01:51, 16.93it/s, loss=3.19, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496]#015Epoch 0:  81%|██████

[34m#015Validating:  10%|▉         | 60/616 [00:03<00:32, 17.18it/s]#033[A#015Epoch 0:  95%|█████████▍| 9630/10169 [09:28<00:31, 16.95it/s, loss=3.09, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496][0m
[34m#015Validating:  15%|█▍        | 90/616 [00:05<00:30, 17.05it/s]#033[A#015Epoch 0:  95%|█████████▍| 9660/10169 [09:29<00:30, 16.95it/s, loss=3.09, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496][0m
[34m#015Validating:  19%|█▉        | 120/616 [00:07<00:29, 17.05it/s]#033[A#015Epoch 0:  95%|█████████▌| 9690/10169 [09:31<00:28, 16.95it/s, loss=3.09, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496][0m
[34m#015Validating:  24%|██▍       | 150/616 [00:08<00:26, 17.34it/s]#033[A#015Epoch 0:  96%|█████████▌| 9720/10169 [09:33<00:26, 16.95it/s, loss=3.09, v_num=0, val_f1_epoch=0.0232, val_loss_epoch=0.705, train_f1=0.496][0m
[34m#015Validating:  29%|██▉       | 180/616 [00:10<00:24, 17.96it/s]#033[A#015Epoch 0:  96%|████████

[34m     | 1320/10169 [01:18<08:46, 16.82it/s, loss=2.67, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  13%|█▎        | 1350/10169 [01:20<08:44, 16.82it/s, loss=2.67, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  13%|█▎        | 1350/10169 [01:20<08:44, 16.82it/s, loss=2.94, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  14%|█▎        | 1380/10169 [01:21<08:42, 16.83it/s, loss=2.94, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  14%|█▎        | 1380/10169 [01:21<08:42, 16.83it/s, loss=2.33, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  14%|█▍        | 1410/10169 [01:23<08:40, 16.82it/s, loss=2.33, v_num=0, val_f1_

[34m_loss_step=0.0488]#015Epoch 1:  26%|██▌       | 2640/10169 [02:36<07:25, 16.89it/s, loss=3.19, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  26%|██▋       | 2670/10169 [02:38<07:24, 16.88it/s, loss=3.19, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  26%|██▋       | 2670/10169 [02:38<07:24, 16.88it/s, loss=2.62, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  27%|██▋       | 2700/10169 [02:39<07:22, 16.88it/s, loss=2.62, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  27%|██▋       | 2700/10169 [02:39<07:22, 16.88it/s, loss=2.57, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  27%|██▋       | 2730/10169 [02:41<07:2

[34mh=0.495, val_loss_epoch=0.0896, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  39%|███▉      | 3960/10169 [03:54<06:07, 16.90it/s, loss=2.34, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  39%|███▉      | 3960/10169 [03:54<06:07, 16.90it/s, loss=2.83, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  39%|███▉      | 3990/10169 [03:55<06:05, 16.91it/s, loss=2.83, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  39%|███▉      | 3990/10169 [03:55<06:05, 16.91it/s, loss=2.76, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  40%|███▉      | 4020/10169 [03:57<06:03, 16.91it/s, loss=2.76, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val

[34m1%|█████▏    | 5220/10169 [05:09<04:53, 16.88it/s, loss=2.8, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  52%|█████▏    | 5250/10169 [05:11<04:51, 16.88it/s, loss=2.8, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  52%|█████▏    | 5250/10169 [05:11<04:51, 16.88it/s, loss=2.35, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  52%|█████▏    | 5280/10169 [05:12<04:49, 16.88it/s, loss=2.35, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  52%|█████▏    | 5280/10169 [05:12<04:49, 16.88it/s, loss=2.38, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  52%|█████▏    | 5310/10169 [05:14<04:47, 16.87it/s, loss=2.38, v_num=0, v

[34ms, loss=3.17, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  64%|██████▍   | 6510/10169 [06:25<03:36, 16.89it/s, loss=3.17, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  64%|██████▍   | 6510/10169 [06:25<03:36, 16.89it/s, loss=2.42, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  64%|██████▍   | 6540/10169 [06:27<03:34, 16.89it/s, loss=2.42, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  64%|██████▍   | 6540/10169 [06:27<03:34, 16.89it/s, loss=3.04, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  65%|██████▍   | 6570/10169 [06:29<03:33, 16.88it/s, loss=3.04, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, trai

[34m=0.0896, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  76%|███████▌  | 7740/10169 [07:38<02:23, 16.90it/s, loss=3.4, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.493, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  76%|███████▋  | 7770/10169 [07:39<02:21, 16.90it/s, loss=3.4, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.493, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  76%|███████▋  | 7770/10169 [07:39<02:21, 16.90it/s, loss=2.68, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  77%|███████▋  | 7800/10169 [07:41<02:20, 16.90it/s, loss=2.68, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  77%|███████▋  | 7800/10169 [07:41<02:20, 16.90it/s, loss=2.87, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epo

[34mEpoch 1:  88%|████████▊ | 8970/10169 [08:50<01:10, 16.90it/s, loss=3.1, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488] #015Epoch 1:  89%|████████▊ | 9000/10169 [08:52<01:09, 16.89it/s, loss=3.1, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  89%|████████▊ | 9000/10169 [08:52<01:09, 16.89it/s, loss=2.69, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  89%|████████▉ | 9030/10169 [08:54<01:07, 16.89it/s, loss=2.69, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0488]#015Epoch 1:  89%|████████▉ | 9030/10169 [08:54<01:07, 16.89it/s, loss=2.8, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0488] #015Epoch 1:  89%|████████▉ | 9060/10169 [08:56<01:05, 16.89it/s, loss=2.8,

[34m#015Validating:  24%|██▍       | 150/616 [00:08<00:26, 17.37it/s]#033[A#015Epoch 1:  96%|█████████▌| 9720/10169 [09:34<00:26, 16.91it/s, loss=2.92, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0488][0m
[34m#015Validating:  29%|██▉       | 180/616 [00:10<00:24, 18.02it/s]#033[A#015Epoch 1:  96%|█████████▌| 9750/10169 [09:36<00:24, 16.92it/s, loss=2.92, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0488][0m
[34m#015Validating:  34%|███▍      | 210/616 [00:11<00:22, 17.96it/s]#033[A#015Epoch 1:  96%|█████████▌| 9780/10169 [09:37<00:22, 16.92it/s, loss=2.92, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0896, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0488][0m
[34m#015Validating:  39%|███▉      | 240/616 [00:13<00:21, 17.76it/s]#033[A#015Epoch 1:  96%|█████████▋| 9810/10169 [09:39<00:21, 16.92it/s, loss=2.92, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.089

[34m | 1320/10169 [01:18<08:44, 16.87it/s, loss=2.77, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  13%|█▎        | 1350/10169 [01:20<08:42, 16.87it/s, loss=2.77, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  13%|█▎        | 1350/10169 [01:20<08:42, 16.86it/s, loss=2.61, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  14%|█▎        | 1380/10169 [01:21<08:40, 16.87it/s, loss=2.61, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  14%|█▎        | 1380/10169 [01:21<08:40, 16.87it/s, loss=2.76, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  14%|█▍        | 1410/10169 [01:23<08:40, 16.84it/s, loss=2.76, v_num=0, val_f1_epoc

[34moss_step=0.0483]#015Epoch 2:  26%|██▌       | 2640/10169 [02:35<07:23, 16.97it/s, loss=2.6, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483] #015Epoch 2:  26%|██▋       | 2670/10169 [02:37<07:21, 16.97it/s, loss=2.6, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  26%|██▋       | 2670/10169 [02:37<07:21, 16.97it/s, loss=2.37, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  27%|██▋       | 2700/10169 [02:39<07:20, 16.96it/s, loss=2.37, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  27%|██▋       | 2700/10169 [02:39<07:20, 16.96it/s, loss=2.83, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  27%|██▋       | 2730/10169 [02:40<07:18, 

[34m, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  39%|███▉      | 3960/10169 [03:53<06:05, 16.98it/s, loss=2.54, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  39%|███▉      | 3960/10169 [03:53<06:05, 16.98it/s, loss=2.66, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  39%|███▉      | 3990/10169 [03:54<06:03, 16.98it/s, loss=2.66, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  39%|███▉      | 3990/10169 [03:54<06:03, 16.98it/s, loss=2.56, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  40%|███▉      | 4020/10169 [03:56<06:02, 16.98it/s, loss=2.56, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_s

[34m███▏    | 5220/10169 [05:07<04:51, 16.95it/s, loss=3.09, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  52%|█████▏    | 5250/10169 [05:09<04:50, 16.95it/s, loss=3.09, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  52%|█████▏    | 5250/10169 [05:09<04:50, 16.95it/s, loss=3.09, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  52%|█████▏    | 5280/10169 [05:11<04:48, 16.96it/s, loss=3.09, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  52%|█████▏    | 5280/10169 [05:11<04:48, 16.96it/s, loss=2.47, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  52%|█████▏    | 5310/10169 [05:13<04:46, 16.95it/s, loss=2.47, v_num=0, val_

[34ms=3, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]   #015Epoch 2:  64%|██████▍   | 6510/10169 [06:24<03:35, 16.94it/s, loss=3, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  64%|██████▍   | 6510/10169 [06:24<03:35, 16.94it/s, loss=3.27, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  64%|██████▍   | 6540/10169 [06:26<03:34, 16.94it/s, loss=3.27, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  64%|██████▍   | 6540/10169 [06:26<03:34, 16.94it/s, loss=2.93, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  65%|██████▍   | 6570/10169 [06:27<03:32, 16.94it/s, loss=2.93, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.49

[34m898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  76%|███████▌  | 7740/10169 [07:37<02:23, 16.94it/s, loss=2.72, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  76%|███████▋  | 7770/10169 [07:38<02:21, 16.94it/s, loss=2.72, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  76%|███████▋  | 7770/10169 [07:38<02:21, 16.94it/s, loss=3.59, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  77%|███████▋  | 7800/10169 [07:40<02:19, 16.93it/s, loss=3.59, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  77%|███████▋  | 7800/10169 [07:40<02:19, 16.93it/s, loss=3.26, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch

[34m483]#015Epoch 2:  88%|████████▊ | 8970/10169 [08:49<01:10, 16.93it/s, loss=2.89, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  89%|████████▊ | 9000/10169 [08:51<01:09, 16.93it/s, loss=2.89, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.495, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  89%|████████▊ | 9000/10169 [08:51<01:09, 16.93it/s, loss=2.82, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  89%|████████▉ | 9030/10169 [08:53<01:07, 16.93it/s, loss=2.82, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.497, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  89%|████████▉ | 9030/10169 [08:53<01:07, 16.93it/s, loss=2.88, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.496, val_f1_step=0.498, val_loss_step=0.0483]#015Epoch 2:  89%|████████▉ | 9060/10169 [08:55<01:05, 16.93it/s, 

[34m#015Validating:  24%|██▍       | 150/616 [00:08<00:26, 17.91it/s]#033[A#015Epoch 2:  96%|█████████▌| 9720/10169 [09:32<00:26, 16.97it/s, loss=3.27, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0483][0m
[34m#015Validating:  29%|██▉       | 180/616 [00:09<00:23, 18.59it/s]#033[A#015Epoch 2:  96%|█████████▌| 9750/10169 [09:34<00:24, 16.98it/s, loss=3.27, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0483][0m
[34m#015Validating:  34%|███▍      | 210/616 [00:11<00:21, 18.55it/s]#033[A#015Epoch 2:  96%|█████████▌| 9780/10169 [09:35<00:22, 16.98it/s, loss=3.27, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.0898, train_f1=0.494, val_f1_step=0.498, val_loss_step=0.0483][0m
[34m#015Validating:  39%|███▉      | 240/616 [00:13<00:20, 18.35it/s]#033[A#015Epoch 2:  96%|█████████▋| 9810/10169 [09:37<00:21, 16.98it/s, loss=3.27, v_num=0, val_f1_epoch=0.495, val_loss_epoch=0.089

[34mf1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.499]#015Epoch 0:  36%|███▌      | 1680/4714 [01:38<02:57, 17.14it/s, loss=2.63, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.511]#015Epoch 0:  36%|███▋      | 1710/4714 [01:39<02:55, 17.12it/s, loss=2.63, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.511]#015Epoch 0:  36%|███▋      | 1710/4714 [01:39<02:55, 17.12it/s, loss=3.13, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.503]#015Epoch 0:  37%|███▋      | 1740/4714 [01:41<02:53, 17.13it/s, loss=3.13, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.503]#015Epoch 0:  37%|███▋      | 1740/4714 [01:41<02:53, 17.13it/s, loss=2.99, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.498]#015Epoch 0:  38%|███▊      | 1770/4714 [01:43<02:51, 17.12it/s, loss=2.99, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.498]#015Epoch 0:  38%|███▊      | 1770/4714 [01:43<02:51, 17.12it/s, loss=4.3, v_num=0,

[34m95]#015Epoch 0:  70%|███████   | 3300/4714 [03:14<01:23, 17.00it/s, loss=2.34, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.495]#015Epoch 0:  71%|███████   | 3330/4714 [03:15<01:21, 17.00it/s, loss=2.34, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.495]#015Epoch 0:  71%|███████   | 3330/4714 [03:15<01:21, 17.00it/s, loss=3.48, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.527]#015Epoch 0:  71%|███████▏  | 3360/4714 [03:17<01:19, 17.00it/s, loss=3.48, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.527]#015Epoch 0:  71%|███████▏  | 3360/4714 [03:17<01:19, 17.00it/s, loss=2.85, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.513]#015Epoch 0:  72%|███████▏  | 3390/4714 [03:19<01:17, 17.01it/s, loss=2.85, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.513]#015Epoch 0:  72%|███████▏  | 3390/4714 [03:19<01:17, 17.01it/s, loss=3.32, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_

[34m#015Validating:  82%|████████▏ | 240/292 [00:13<00:02, 17.79it/s]#033[A#015Epoch 0:  99%|█████████▉| 4680/4714 [04:33<00:01, 17.10it/s, loss=2.94, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.514][0m
[34m#015Validating:  92%|█████████▏| 270/292 [00:15<00:01, 17.68it/s]#033[A#015Epoch 0: 100%|█████████▉| 4710/4714 [04:35<00:00, 17.10it/s, loss=2.94, v_num=0, val_f1_epoch=0.0159, val_loss_epoch=0.673, train_f1=0.514][0m
[34m#015Validating: 100%|██████████| 292/292 [00:16<00:00, 17.62it/s]#033[A#015Epoch 0: 100%|██████████| 4714/4714 [04:36<00:00, 17.03it/s, loss=2.61, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.075][0m
[34m#015                                                             #033[A#015Epoch 0:   0%|          | 0/4714 [00:00<?, ?it/s, loss=2.61, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.075]           #015Epoch 1:   0%|          |

[34m=1.92, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.499, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  29%|██▊       | 1350/4714 [01:19<03:17, 17.04it/s, loss=1.92, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.499, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  29%|██▊       | 1350/4714 [01:19<03:17, 17.04it/s, loss=1.72, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.497, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  29%|██▉       | 1380/4714 [01:21<03:15, 17.02it/s, loss=1.72, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.497, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  29%|██▉       | 1380/4714 [01:21<03:15, 17.02it/s, loss=2.98, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.514, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  30%|██▉       | 1410/4714 [01:22<03:13, 17.03it/s, loss=2.98, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.514, val_f

[34m:  56%|█████▌    | 2640/4714 [02:34<02:01, 17.06it/s, loss=1.72, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  56%|█████▌    | 2640/4714 [02:34<02:01, 17.06it/s, loss=1.74, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.508, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  57%|█████▋    | 2670/4714 [02:36<01:59, 17.07it/s, loss=1.74, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.508, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  57%|█████▋    | 2670/4714 [02:36<01:59, 17.07it/s, loss=2.88, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  57%|█████▋    | 2700/4714 [02:38<01:58, 17.06it/s, loss=2.88, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  57%|█████▋    | 2700/4714 [02:38<01:58, 17.06it/s, loss=1.97, v_num=0, val_f1

[34m:49<00:47, 17.03it/s, loss=3.03, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.512, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  83%|████████▎ | 3900/4714 [03:49<00:47, 17.03it/s, loss=2.42, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.507, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  83%|████████▎ | 3930/4714 [03:50<00:46, 17.02it/s, loss=2.42, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.507, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  83%|████████▎ | 3930/4714 [03:50<00:46, 17.02it/s, loss=2.62, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.495, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  84%|████████▍ | 3960/4714 [03:52<00:44, 17.02it/s, loss=2.62, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.495, val_f1_step=0.505, val_loss_step=0.075]#015Epoch 1:  84%|████████▍ | 3960/4714 [03:52<00:44, 17.02it/s, loss=3.54, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0

[34m#015Validating:  82%|████████▏ | 240/292 [00:13<00:02, 17.92it/s]#033[A#015Epoch 1:  99%|█████████▉| 4680/4714 [04:33<00:01, 17.11it/s, loss=1.91, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.497, val_f1_step=0.505, val_loss_step=0.075][0m
[34m#015Validating:  92%|█████████▏| 270/292 [00:15<00:01, 18.03it/s]#033[A#015Epoch 1: 100%|█████████▉| 4710/4714 [04:35<00:00, 17.12it/s, loss=1.91, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0872, train_f1=0.497, val_f1_step=0.505, val_loss_step=0.075][0m
[34m#015Validating: 100%|██████████| 292/292 [00:16<00:00, 17.85it/s]#033[A#015Epoch 1: 100%|██████████| 4714/4714 [04:36<00:00, 17.05it/s, loss=2.26, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.0758][0m
[34m#015                                                             #033[A#015Epoch 1:   0%|          | 0/4714 [00:00<?, ?it/s, loss=2.26, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.496

[34mloss_step=0.0758]#015Epoch 2:  28%|██▊       | 1320/4714 [01:17<03:20, 16.93it/s, loss=3.48, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.495, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  29%|██▊       | 1350/4714 [01:19<03:18, 16.94it/s, loss=3.48, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.495, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  29%|██▊       | 1350/4714 [01:19<03:18, 16.94it/s, loss=1.88, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.506, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  29%|██▉       | 1380/4714 [01:21<03:16, 16.93it/s, loss=1.88, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.506, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  29%|██▉       | 1380/4714 [01:21<03:16, 16.93it/s, loss=1.68, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.495, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  30%|██▉       | 1410/4714 [01:23<03:15, 16.9

[34m�██▌    | 2610/4714 [02:34<02:04, 16.95it/s, loss=3.51, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.512, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  56%|█████▌    | 2640/4714 [02:35<02:02, 16.95it/s, loss=3.51, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.512, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  56%|█████▌    | 2640/4714 [02:35<02:02, 16.95it/s, loss=1.86, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  57%|█████▋    | 2670/4714 [02:37<02:00, 16.94it/s, loss=1.86, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  57%|█████▋    | 2670/4714 [02:37<02:00, 16.94it/s, loss=2.49, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  57%|█████▋    | 2700/4714 [02:39<01:58, 16.95it/s, loss=2.49, v_num=0, val_f1_epo

[34m=0.0758]#015Epoch 2:  82%|████████▏ | 3870/4714 [03:48<00:49, 16.96it/s, loss=2.22, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  83%|████████▎ | 3900/4714 [03:49<00:47, 16.97it/s, loss=2.22, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  83%|████████▎ | 3900/4714 [03:49<00:47, 16.97it/s, loss=2.76, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  83%|████████▎ | 3930/4714 [03:51<00:46, 16.96it/s, loss=2.76, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.0758]#015Epoch 2:  83%|████████▎ | 3930/4714 [03:51<00:46, 16.96it/s, loss=4, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.495, val_f1_step=0.505, val_loss_step=0.0758]   #015Epoch 2:  84%|████████▍ | 3960/4714 [03:53<00:44, 16.96it/s, lo

[34m#015Validating:  82%|████████▏ | 240/292 [00:13<00:02, 18.49it/s]#033[A#015Epoch 2:  99%|█████████▉| 4680/4714 [04:33<00:01, 17.08it/s, loss=2.12, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.0758][0m
[34m#015Validating:  92%|█████████▏| 270/292 [00:14<00:01, 18.62it/s]#033[A#015Epoch 2: 100%|█████████▉| 4710/4714 [04:35<00:00, 17.09it/s, loss=2.12, v_num=0, val_f1_epoch=0.501, val_loss_epoch=0.0879, train_f1=0.498, val_f1_step=0.505, val_loss_step=0.0758][0m
[34m#015Validating: 100%|██████████| 292/292 [00:15<00:00, 18.41it/s]#033[A#015Epoch 2: 100%|██████████| 4714/4714 [04:36<00:00, 17.03it/s, loss=2.55, v_num=0, val_f1_epoch=0.502, val_loss_epoch=0.0871, train_f1=0.496, val_f1_step=0.505, val_loss_step=0.0757][0m
[34m#015                                                             #033[A#015Epoch 2: 100%|██████████| 4714/4714 [04:54<00:00, 16.01it/s, loss=2.55, v_num=0, val_f1_epoch=0.502, val_loss_epoch=0.0871, t

[34mh=0.143, val_loss_epoch=0.664, train_f1=0.646]#015Epoch 0:  45%|████▍     | 1680/3747 [01:38<02:01, 17.07it/s, loss=1.46, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.588]#015Epoch 0:  46%|████▌     | 1710/3747 [01:40<01:59, 17.07it/s, loss=1.46, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.588]#015Epoch 0:  46%|████▌     | 1710/3747 [01:40<01:59, 17.07it/s, loss=1.26, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.556]#015Epoch 0:  46%|████▋     | 1740/3747 [01:41<01:57, 17.08it/s, loss=1.26, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.556]#015Epoch 0:  46%|████▋     | 1740/3747 [01:41<01:57, 17.08it/s, loss=1.45, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.585]#015Epoch 0:  47%|████▋     | 1770/3747 [01:43<01:55, 17.07it/s, loss=1.45, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.585]#015Epoch 0:  47%|████▋     | 1770/3747 [01:43<01:55, 17.07it/s, loss=2.32, v_num=0, val_f1_epoch

[34mch=0.664, train_f1=0.591]#015Epoch 0:  88%|████████▊ | 3300/3747 [03:13<00:26, 17.02it/s, loss=1.48, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.591]#015Epoch 0:  88%|████████▊ | 3300/3747 [03:13<00:26, 17.02it/s, loss=1.99, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.662]#015Epoch 0:  89%|████████▉ | 3330/3747 [03:15<00:24, 17.03it/s, loss=1.99, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.662]#015Epoch 0:  89%|████████▉ | 3330/3747 [03:15<00:24, 17.03it/s, loss=0.783, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.592]#015Epoch 0:  90%|████████▉ | 3360/3747 [03:17<00:22, 17.03it/s, loss=0.783, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.592]#015Epoch 0:  90%|████████▉ | 3360/3747 [03:17<00:22, 17.03it/s, loss=1.66, v_num=0, val_f1_epoch=0.143, val_loss_epoch=0.664, train_f1=0.622] #015Epoch 0:  90%|█████████ | 3390/3747 [03:19<00:20, 17.03it/s, loss=1.66, v_num=0, val_f1_epoch=0.143, val_loss_e

[34m_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.636, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  36%|███▌      | 1350/3747 [01:19<02:20, 17.02it/s, loss=1.28, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.636, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  36%|███▌      | 1350/3747 [01:19<02:20, 17.02it/s, loss=1.49, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.696, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  37%|███▋      | 1380/3747 [01:21<02:19, 17.02it/s, loss=1.49, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.696, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  37%|███▋      | 1380/3747 [01:21<02:19, 17.02it/s, loss=1.13, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.689, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  38%|███▊      | 1410/3747 [01:22<02:17, 17.02it/s, loss=1.13, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.689, val_f1_step=0.836, 

[34m06, val_loss_epoch=0.305, train_f1=0.722, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  70%|███████   | 2640/3747 [02:34<01:04, 17.07it/s, loss=1.31, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.722, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  70%|███████   | 2640/3747 [02:34<01:04, 17.07it/s, loss=1.45, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.759, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  71%|███████▏  | 2670/3747 [02:36<01:03, 17.06it/s, loss=1.45, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.759, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  71%|███████▏  | 2670/3747 [02:36<01:03, 17.06it/s, loss=1.51, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.705, val_f1_step=0.836, val_loss_step=0.198]#015Epoch 1:  72%|███████▏  | 2700/3747 [02:38<01:01, 17.07it/s, loss=1.51, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.705, val_f1_step=0.836, val_loss_step=0.198]#015

[34m#015Validating:  30%|███       | 60/198 [00:03<00:07, 17.76it/s]#033[A#015Epoch 1:  97%|█████████▋| 3630/3747 [03:31<00:06, 17.13it/s, loss=1.69, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.690, val_f1_step=0.836, val_loss_step=0.198][0m
[34m#015Validating:  45%|████▌     | 90/198 [00:05<00:06, 17.63it/s]#033[A#015Epoch 1:  98%|█████████▊| 3660/3747 [03:33<00:05, 17.14it/s, loss=1.69, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.690, val_f1_step=0.836, val_loss_step=0.198][0m
[34m#015Validating:  61%|██████    | 120/198 [00:06<00:04, 17.39it/s]#033[A#015Epoch 1:  98%|█████████▊| 3690/3747 [03:35<00:03, 17.13it/s, loss=1.69, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0.690, val_f1_step=0.836, val_loss_step=0.198][0m
[34m#015Validating:  76%|███████▌  | 150/198 [00:08<00:02, 17.20it/s]#033[A#015Epoch 1:  99%|█████████▉| 3720/3747 [03:37<00:01, 17.13it/s, loss=1.69, v_num=0, val_f1_epoch=0.706, val_loss_epoch=0.305, train_f1=0

[34moss=1.04, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.758, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  36%|███▌      | 1350/3747 [01:19<02:20, 17.01it/s, loss=1.04, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.758, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  36%|███▌      | 1350/3747 [01:19<02:20, 17.01it/s, loss=1.13, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.767, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  37%|███▋      | 1380/3747 [01:21<02:19, 17.00it/s, loss=1.13, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.767, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  37%|███▋      | 1380/3747 [01:21<02:19, 17.00it/s, loss=1.08, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.710, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  38%|███▊      | 1410/3747 [01:22<02:17, 17.00it/s, loss=1.08, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.710, val_f1_s

[34m.28, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.734, val_f1_step=0.816, val_loss_step=0.188] #015Epoch 2:  70%|███████   | 2640/3747 [02:35<01:05, 16.95it/s, loss=1.28, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.734, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  70%|███████   | 2640/3747 [02:35<01:05, 16.95it/s, loss=0.995, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.751, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  71%|███████▏  | 2670/3747 [02:37<01:03, 16.94it/s, loss=0.995, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.751, val_f1_step=0.816, val_loss_step=0.188]#015Epoch 2:  71%|███████▏  | 2670/3747 [02:37<01:03, 16.94it/s, loss=1.32, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.728, val_f1_step=0.816, val_loss_step=0.188] #015Epoch 2:  72%|███████▏  | 2700/3747 [02:39<01:01, 16.95it/s, loss=1.32, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.728, val_f1_st

[34m#015Validating:  30%|███       | 60/198 [00:03<00:07, 17.46it/s]#033[A#015Epoch 2:  97%|█████████▋| 3630/3747 [03:32<00:06, 17.07it/s, loss=1.46, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.756, val_f1_step=0.816, val_loss_step=0.188][0m
[34m#015Validating:  45%|████▌     | 90/198 [00:05<00:06, 17.39it/s]#033[A#015Epoch 2:  98%|█████████▊| 3660/3747 [03:34<00:05, 17.07it/s, loss=1.46, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.756, val_f1_step=0.816, val_loss_step=0.188][0m
[34m#015Validating:  61%|██████    | 120/198 [00:06<00:04, 17.22it/s]#033[A#015Epoch 2:  98%|█████████▊| 3690/3747 [03:36<00:03, 17.07it/s, loss=1.46, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0.756, val_f1_step=0.816, val_loss_step=0.188][0m
[34m#015Validating:  76%|███████▌  | 150/198 [00:08<00:02, 17.12it/s]#033[A#015Epoch 2:  99%|█████████▉| 3720/3747 [03:37<00:01, 17.07it/s, loss=1.46, v_num=0, val_f1_epoch=0.735, val_loss_epoch=0.293, train_f1=0


2021-10-27 13:26:11 Uploading - Uploading generated training model
2021-10-27 13:30:12 Completed - Training job completed
Training seconds: 14115
Billable seconds: 14115


In [31]:
train_df = tot_df

In [30]:
tags_2d.intersection(tags_1d)

{'Capacities & Response->Number Of People Reached/Response Gaps',
 'Context->Economy',
 'Covid-19->Restriction Measures'}

In [21]:
assert (1==2)


AssertionError: 

## Code used for deploying and testing models:

In [None]:
import mlflow
import torch

In [None]:
model = mlflow.pytorch.load_model(
    's3://deep-mlflow-artifact/16/21a5ece6091b4ddf8b223e78159ce1c7/artifacts/pytorch_model_all',
    map_location=torch.device('cpu')   
)

In [None]:
model.predict(test_data['excerpt'])

In [None]:
from mlflow import sagemaker

In [None]:
sagemaker.deploy(
    'testcpu-pytorch-trained-gpu',
    's3://deep-mlflow-artifact/16/21a5ece6091b4ddf8b223e78159ce1c7/artifacts/pytorch_model_all',
    execution_role_arn=SAGEMAKER_ROLE_ARN,
    image_url="961104659532.dkr.ecr.us-east-1.amazonaws.com/mlflow-pyfunc:latest",
    region_name="us-east-1",
    instance_type="ml.c4.xlarge",
    synchronous=False,
    archive=True,
)