# Named Entity Recognition Using BERT

### Required packages
* pytorch-pretrained-bert
* pandas
* seqeval

In [22]:
import sys
import os
import yaml
import random
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
from seqeval.metrics import f1_score

import torch

from pytorch_pretrained_bert.tokenization import BertTokenizer
from torch.optim import Adam

bert_utils_path = os.path.abspath('../../utils_nlp/bert')
if bert_utils_path not in sys.path:
    sys.path.insert(0, bert_utils_path)

from configs import BERTFineTuneConfig
from bert_data_utils import KaggleNERProcessor
from bert_utils import (BertTokenClassifier, 
                        convert_examples_to_token_features, 
                        print_dict, 
                        get_device)


In [2]:
config_file = "config.yaml"

## Configurations

In [3]:
with open(config_file, 'r') as ymlfile:
    config_dict = yaml.safe_load(ymlfile)

print_dict(config_dict)

{TrainConfig: {'batch_size': 32, 'num_train_epochs': 3}
ModelConfig: {'bert_model': 'bert-base-uncased', 'max_seq_length': 75, 'do_lower_case': True}
OptimizerConfig: {'optimizer_name': 'BertAdam', 'learning_rate': 3e-05, 'no_decay_params': ['bias', 'gamma', 'beta'], 'params_weight_decay': 0.01, 'clip_gradient': True, 'max_gradient_norm': 1.0}}


In [4]:
config = BERTFineTuneConfig(config_dict)

## Preprocess Data

### Create training and validation examples
KaggleNERProcessor is a dataset specific class that generates training and evaluation examples in the format accepted by all utility functions. 

In [5]:
kaggle_ner_processor = KaggleNERProcessor(data_dir="./data/NER/ner_dataset.csv", dev_percentage = 0.1)

In [6]:
train_examples = kaggle_ner_processor.get_train_examples(data_dir="./data/NER/ner_dataset.csv")
dev_examples = kaggle_ner_processor.get_dev_examples(data_dir="./data/NER/ner_dataset.csv")
label_list = kaggle_ner_processor.get_labels()

In [7]:
print('Sample sentence: \n{}\n'.format(train_examples[0].text_a))
print('Sample sentence labels: \n{}\n'.format(train_examples[1].label))

Sample sentence: 
Thousands of demonstrators have marched through London to protest the war in Iraq and demand the withdrawal of British troops from that country .

Sample sentence labels: 
['B-gpe', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-tim', 'O', 'O', 'O', 'B-org', 'O', 'O', 'O', 'O', 'O']



### Convert examples to features
The function `convert_examples_to_token_features` converts raw string data to numerical features, involving the following steps:
1. Tokenization
2. Convert tokens and labels to numerical values
3. Sequence padding or truncation

In [8]:
label_map = {label: i for i, label in enumerate(label_list)}

In [9]:
tokenizer = BertTokenizer.from_pretrained(config.bert_model,
                                          do_lower_case=config.do_lower_case)

train_features = convert_examples_to_token_features(examples=train_examples,
                                                    tokenizer=tokenizer,
                                                    label_map=label_map, 
                                                    config=config)
dev_features = convert_examples_to_token_features(examples=dev_examples,
                                                  tokenizer=tokenizer,
                                                  label_map=label_map, 
                                                  config=config)

In [10]:
print("Sample token id:\n{}\n".format(train_features[0].input_ids))
print("Sample attention mask:\n{}\n".format(train_features[0].input_mask))
print("Sample label ids:\n{}\n".format(train_features[0].label_id))

Sample token id:
[5190, 1997, 28337, 2031, 9847, 2083, 2414, 2000, 6186, 1996, 2162, 1999, 5712, 1998, 5157, 1996, 10534, 1997, 2329, 3629, 2013, 2008, 2406, 1012, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

Sample attention mask:
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

Sample label ids:
[6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,

## Create Token Classifier

In [11]:
device, n_gpu = get_device()
token_classifier = BertTokenClassifier(config=config, 
                                       label_map=label_map, 
                                       device=device, 
                                       n_gpu=n_gpu)

BERT fine tune configurations:
batch_size=32
num_train_epochs=3
bert_model=bert-base-uncased
max_seq_length=75
do_lower_case=True
optimizer_name=BertAdam
learning_rate=3e-05
no_decay_params=['bias', 'gamma', 'beta']
params_weight_decay=0.01
clip_gradient=True
max_gradient_norm=1.0


t_total value of -1 results in schedule not being applied


## Train Model

In [12]:
token_classifier.fit(train_features)

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]
Iteration:   0%|          | 0/1349 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/1349 [00:01<40:19,  1.79s/it][A
Iteration:   0%|          | 2/1349 [00:02<34:49,  1.55s/it][A
Iteration:   0%|          | 3/1349 [00:03<30:56,  1.38s/it][A
Iteration:   0%|          | 4/1349 [00:04<28:18,  1.26s/it][A
Iteration:   0%|          | 5/1349 [00:05<26:24,  1.18s/it][A
Iteration:   0%|          | 6/1349 [00:06<25:10,  1.12s/it][A
Iteration:   1%|          | 7/1349 [00:07<24:12,  1.08s/it][A
Iteration:   1%|          | 8/1349 [00:08<23:31,  1.05s/it][A
Iteration:   1%|          | 9/1349 [00:09<22:59,  1.03s/it][A
Iteration:   1%|          | 10/1349 [00:10<22:39,  1.02s/it][A
Iteration:   1%|          | 11/1349 [00:11<22:24,  1.00s/it][A
Iteration:   1%|          | 12/1349 [00:12<22:14,  1.00it/s][A
Iteration:   1%|          | 13/1349 [00:13<22:08,  1.01it/s][A
Iteration:   1%|          | 14/1349 [00:14<22:02,  1.01it/s][A
Iteration:   

Iteration:   9%|▉         | 127/1349 [02:07<20:07,  1.01it/s][A
Iteration:   9%|▉         | 128/1349 [02:08<20:11,  1.01it/s][A
Iteration:  10%|▉         | 129/1349 [02:09<20:10,  1.01it/s][A
Iteration:  10%|▉         | 130/1349 [02:10<20:45,  1.02s/it][A
Iteration:  10%|▉         | 131/1349 [02:11<20:33,  1.01s/it][A
Iteration:  10%|▉         | 132/1349 [02:12<20:31,  1.01s/it][A
Iteration:  10%|▉         | 133/1349 [02:13<20:21,  1.00s/it][A
Iteration:  10%|▉         | 134/1349 [02:14<20:19,  1.00s/it][A
Iteration:  10%|█         | 135/1349 [02:15<20:19,  1.00s/it][A
Iteration:  10%|█         | 136/1349 [02:16<20:15,  1.00s/it][A
Iteration:  10%|█         | 137/1349 [02:17<20:09,  1.00it/s][A
Iteration:  10%|█         | 138/1349 [02:18<20:05,  1.00it/s][A
Iteration:  10%|█         | 139/1349 [02:19<20:03,  1.01it/s][A
Iteration:  10%|█         | 140/1349 [02:20<20:03,  1.00it/s][A
Iteration:  10%|█         | 141/1349 [02:21<20:04,  1.00it/s][A
Iteration:  11%|█        

Iteration:  19%|█▉        | 253/1349 [04:13<18:13,  1.00it/s][A
Iteration:  19%|█▉        | 254/1349 [04:14<18:10,  1.00it/s][A
Iteration:  19%|█▉        | 255/1349 [04:15<18:11,  1.00it/s][A
Iteration:  19%|█▉        | 256/1349 [04:16<18:11,  1.00it/s][A
Iteration:  19%|█▉        | 257/1349 [04:17<18:09,  1.00it/s][A
Iteration:  19%|█▉        | 258/1349 [04:18<18:06,  1.00it/s][A
Iteration:  19%|█▉        | 259/1349 [04:19<18:03,  1.01it/s][A
Iteration:  19%|█▉        | 260/1349 [04:20<18:02,  1.01it/s][A
Iteration:  19%|█▉        | 261/1349 [04:21<18:01,  1.01it/s][A
Iteration:  19%|█▉        | 262/1349 [04:22<18:02,  1.00it/s][A
Iteration:  19%|█▉        | 263/1349 [04:23<18:03,  1.00it/s][A
Iteration:  20%|█▉        | 264/1349 [04:24<18:04,  1.00it/s][A
Iteration:  20%|█▉        | 265/1349 [04:25<18:03,  1.00it/s][A
Iteration:  20%|█▉        | 266/1349 [04:26<18:03,  1.00s/it][A
Iteration:  20%|█▉        | 267/1349 [04:27<17:57,  1.00it/s][A
Iteration:  20%|█▉       

Iteration:  28%|██▊       | 379/1349 [06:19<16:26,  1.02s/it][A
Iteration:  28%|██▊       | 380/1349 [06:20<16:20,  1.01s/it][A
Iteration:  28%|██▊       | 381/1349 [06:21<16:48,  1.04s/it][A
Iteration:  28%|██▊       | 382/1349 [06:22<16:34,  1.03s/it][A
Iteration:  28%|██▊       | 383/1349 [06:23<16:23,  1.02s/it][A
Iteration:  28%|██▊       | 384/1349 [06:24<16:15,  1.01s/it][A
Iteration:  29%|██▊       | 385/1349 [06:25<16:12,  1.01s/it][A
Iteration:  29%|██▊       | 386/1349 [06:26<16:06,  1.00s/it][A
Iteration:  29%|██▊       | 387/1349 [06:27<16:30,  1.03s/it][A
Iteration:  29%|██▉       | 388/1349 [06:28<16:19,  1.02s/it][A
Iteration:  29%|██▉       | 389/1349 [06:29<16:11,  1.01s/it][A
Iteration:  29%|██▉       | 390/1349 [06:30<16:07,  1.01s/it][A
Iteration:  29%|██▉       | 391/1349 [06:31<16:01,  1.00s/it][A
Iteration:  29%|██▉       | 392/1349 [06:32<15:59,  1.00s/it][A
Iteration:  29%|██▉       | 393/1349 [06:34<16:28,  1.03s/it][A
Iteration:  29%|██▉      

Iteration:  37%|███▋      | 505/1349 [08:27<14:27,  1.03s/it][A
Iteration:  38%|███▊      | 506/1349 [08:28<14:21,  1.02s/it][A
Iteration:  38%|███▊      | 507/1349 [08:29<14:14,  1.02s/it][A
Iteration:  38%|███▊      | 508/1349 [08:30<14:31,  1.04s/it][A
Iteration:  38%|███▊      | 509/1349 [08:31<14:19,  1.02s/it][A
Iteration:  38%|███▊      | 510/1349 [08:32<14:10,  1.01s/it][A
Iteration:  38%|███▊      | 511/1349 [08:33<14:06,  1.01s/it][A
Iteration:  38%|███▊      | 512/1349 [08:34<14:03,  1.01s/it][A
Iteration:  38%|███▊      | 513/1349 [08:35<14:26,  1.04s/it][A
Iteration:  38%|███▊      | 514/1349 [08:36<14:15,  1.02s/it][A
Iteration:  38%|███▊      | 515/1349 [08:37<14:09,  1.02s/it][A
Iteration:  38%|███▊      | 516/1349 [08:38<14:04,  1.01s/it][A
Iteration:  38%|███▊      | 517/1349 [08:39<14:03,  1.01s/it][A
Iteration:  38%|███▊      | 518/1349 [08:40<14:02,  1.01s/it][A
Iteration:  38%|███▊      | 519/1349 [08:41<13:56,  1.01s/it][A
Iteration:  39%|███▊     

Iteration:  47%|████▋     | 631/1349 [10:34<12:00,  1.00s/it][A
Iteration:  47%|████▋     | 632/1349 [10:35<12:00,  1.00s/it][A
Iteration:  47%|████▋     | 633/1349 [10:36<12:01,  1.01s/it][A
Iteration:  47%|████▋     | 634/1349 [10:37<11:58,  1.01s/it][A
Iteration:  47%|████▋     | 635/1349 [10:38<11:59,  1.01s/it][A
Iteration:  47%|████▋     | 636/1349 [10:39<12:00,  1.01s/it][A
Iteration:  47%|████▋     | 637/1349 [10:41<12:18,  1.04s/it][A
Iteration:  47%|████▋     | 638/1349 [10:42<12:09,  1.03s/it][A
Iteration:  47%|████▋     | 639/1349 [10:43<12:05,  1.02s/it][A
Iteration:  47%|████▋     | 640/1349 [10:44<12:00,  1.02s/it][A
Iteration:  48%|████▊     | 641/1349 [10:45<11:55,  1.01s/it][A
Iteration:  48%|████▊     | 642/1349 [10:46<11:52,  1.01s/it][A
Iteration:  48%|████▊     | 643/1349 [10:47<11:50,  1.01s/it][A
Iteration:  48%|████▊     | 644/1349 [10:48<11:49,  1.01s/it][A
Iteration:  48%|████▊     | 645/1349 [10:49<11:47,  1.01s/it][A
Iteration:  48%|████▊    

Iteration:  56%|█████▌    | 757/1349 [12:42<09:57,  1.01s/it][A
Iteration:  56%|█████▌    | 758/1349 [12:43<09:56,  1.01s/it][A
Iteration:  56%|█████▋    | 759/1349 [12:44<09:55,  1.01s/it][A
Iteration:  56%|█████▋    | 760/1349 [12:45<10:09,  1.04s/it][A
Iteration:  56%|█████▋    | 761/1349 [12:46<10:02,  1.02s/it][A
Iteration:  56%|█████▋    | 762/1349 [12:47<10:14,  1.05s/it][A
Iteration:  57%|█████▋    | 763/1349 [12:48<10:25,  1.07s/it][A
Iteration:  57%|█████▋    | 764/1349 [12:49<10:13,  1.05s/it][A
Iteration:  57%|█████▋    | 765/1349 [12:50<10:05,  1.04s/it][A
Iteration:  57%|█████▋    | 766/1349 [12:51<09:59,  1.03s/it][A
Iteration:  57%|█████▋    | 767/1349 [12:52<09:54,  1.02s/it][A
Iteration:  57%|█████▋    | 768/1349 [12:53<09:49,  1.01s/it][A
Iteration:  57%|█████▋    | 769/1349 [12:54<10:03,  1.04s/it][A
Iteration:  57%|█████▋    | 770/1349 [12:56<10:15,  1.06s/it][A
Iteration:  57%|█████▋    | 771/1349 [12:57<10:02,  1.04s/it][A
Iteration:  57%|█████▋   

Iteration:  65%|██████▌   | 883/1349 [14:50<07:52,  1.01s/it][A
Iteration:  66%|██████▌   | 884/1349 [14:51<07:49,  1.01s/it][A
Iteration:  66%|██████▌   | 885/1349 [14:52<07:47,  1.01s/it][A
Iteration:  66%|██████▌   | 886/1349 [14:53<07:46,  1.01s/it][A
Iteration:  66%|██████▌   | 887/1349 [14:54<07:45,  1.01s/it][A
Iteration:  66%|██████▌   | 888/1349 [14:55<07:44,  1.01s/it][A
Iteration:  66%|██████▌   | 889/1349 [14:56<07:42,  1.01s/it][A
Iteration:  66%|██████▌   | 890/1349 [14:57<07:42,  1.01s/it][A
Iteration:  66%|██████▌   | 891/1349 [14:58<07:40,  1.01s/it][A
Iteration:  66%|██████▌   | 892/1349 [14:59<07:40,  1.01s/it][A
Iteration:  66%|██████▌   | 893/1349 [15:00<07:38,  1.01s/it][A
Iteration:  66%|██████▋   | 894/1349 [15:01<07:37,  1.01s/it][A
Iteration:  66%|██████▋   | 895/1349 [15:02<07:37,  1.01s/it][A
Iteration:  66%|██████▋   | 896/1349 [15:03<07:36,  1.01s/it][A
Iteration:  66%|██████▋   | 897/1349 [15:04<07:33,  1.00s/it][A
Iteration:  67%|██████▋  

Iteration:  75%|███████▍  | 1008/1349 [16:56<05:53,  1.04s/it][A
Iteration:  75%|███████▍  | 1009/1349 [16:57<05:49,  1.03s/it][A
Iteration:  75%|███████▍  | 1010/1349 [16:58<05:46,  1.02s/it][A
Iteration:  75%|███████▍  | 1011/1349 [16:59<05:43,  1.02s/it][A
Iteration:  75%|███████▌  | 1012/1349 [17:00<05:41,  1.01s/it][A
Iteration:  75%|███████▌  | 1013/1349 [17:01<05:40,  1.01s/it][A
Iteration:  75%|███████▌  | 1014/1349 [17:02<05:38,  1.01s/it][A
Iteration:  75%|███████▌  | 1015/1349 [17:03<05:36,  1.01s/it][A
Iteration:  75%|███████▌  | 1016/1349 [17:05<05:44,  1.04s/it][A
Iteration:  75%|███████▌  | 1017/1349 [17:06<05:41,  1.03s/it][A
Iteration:  75%|███████▌  | 1018/1349 [17:07<05:38,  1.02s/it][A
Iteration:  76%|███████▌  | 1019/1349 [17:08<05:36,  1.02s/it][A
Iteration:  76%|███████▌  | 1020/1349 [17:09<05:33,  1.01s/it][A
Iteration:  76%|███████▌  | 1021/1349 [17:10<05:32,  1.01s/it][A
Iteration:  76%|███████▌  | 1022/1349 [17:11<05:30,  1.01s/it][A
Iteration:

Iteration:  84%|████████▍ | 1132/1349 [19:02<03:40,  1.01s/it][A
Iteration:  84%|████████▍ | 1133/1349 [19:03<03:37,  1.01s/it][A
Iteration:  84%|████████▍ | 1134/1349 [19:04<03:37,  1.01s/it][A
Iteration:  84%|████████▍ | 1135/1349 [19:05<03:35,  1.01s/it][A
Iteration:  84%|████████▍ | 1136/1349 [19:06<03:34,  1.01s/it][A
Iteration:  84%|████████▍ | 1137/1349 [19:07<03:33,  1.01s/it][A
Iteration:  84%|████████▍ | 1138/1349 [19:08<03:32,  1.01s/it][A
Iteration:  84%|████████▍ | 1139/1349 [19:09<03:31,  1.01s/it][A
Iteration:  85%|████████▍ | 1140/1349 [19:10<03:30,  1.01s/it][A
Iteration:  85%|████████▍ | 1141/1349 [19:11<03:29,  1.01s/it][A
Iteration:  85%|████████▍ | 1142/1349 [19:12<03:28,  1.01s/it][A
Iteration:  85%|████████▍ | 1143/1349 [19:13<03:28,  1.01s/it][A
Iteration:  85%|████████▍ | 1144/1349 [19:14<03:26,  1.01s/it][A
Iteration:  85%|████████▍ | 1145/1349 [19:15<03:25,  1.01s/it][A
Iteration:  85%|████████▍ | 1146/1349 [19:16<03:23,  1.00s/it][A
Iteration:

Iteration:  93%|█████████▎| 1256/1349 [21:07<01:33,  1.01s/it][A
Iteration:  93%|█████████▎| 1257/1349 [21:08<01:32,  1.00s/it][A
Iteration:  93%|█████████▎| 1258/1349 [21:09<01:31,  1.01s/it][A
Iteration:  93%|█████████▎| 1259/1349 [21:10<01:30,  1.01s/it][A
Iteration:  93%|█████████▎| 1260/1349 [21:11<01:31,  1.03s/it][A
Iteration:  93%|█████████▎| 1261/1349 [21:13<01:33,  1.06s/it][A
Iteration:  94%|█████████▎| 1262/1349 [21:14<01:30,  1.04s/it][A
Iteration:  94%|█████████▎| 1263/1349 [21:15<01:31,  1.06s/it][A
Iteration:  94%|█████████▎| 1264/1349 [21:16<01:28,  1.04s/it][A
Iteration:  94%|█████████▍| 1265/1349 [21:17<01:26,  1.03s/it][A
Iteration:  94%|█████████▍| 1266/1349 [21:18<01:25,  1.02s/it][A
Iteration:  94%|█████████▍| 1267/1349 [21:19<01:23,  1.02s/it][A
Iteration:  94%|█████████▍| 1268/1349 [21:20<01:22,  1.01s/it][A
Iteration:  94%|█████████▍| 1269/1349 [21:21<01:20,  1.01s/it][A
Iteration:  94%|█████████▍| 1270/1349 [21:22<01:19,  1.01s/it][A
Iteration:

Train loss: 0.132791081621547



Iteration:   0%|          | 1/1349 [00:01<22:39,  1.01s/it][A
Iteration:   0%|          | 2/1349 [00:02<22:37,  1.01s/it][A
Iteration:   0%|          | 3/1349 [00:03<22:35,  1.01s/it][A
Iteration:   0%|          | 4/1349 [00:04<22:34,  1.01s/it][A
Iteration:   0%|          | 5/1349 [00:05<22:36,  1.01s/it][A
Iteration:   0%|          | 6/1349 [00:06<22:31,  1.01s/it][A
Iteration:   1%|          | 7/1349 [00:07<22:26,  1.00s/it][A
Iteration:   1%|          | 8/1349 [00:08<22:25,  1.00s/it][A
Iteration:   1%|          | 9/1349 [00:09<23:07,  1.04s/it][A
Iteration:   1%|          | 10/1349 [00:10<22:52,  1.02s/it][A
Iteration:   1%|          | 11/1349 [00:11<22:43,  1.02s/it][A
Iteration:   1%|          | 12/1349 [00:12<22:35,  1.01s/it][A
Iteration:   1%|          | 13/1349 [00:13<22:30,  1.01s/it][A
Iteration:   1%|          | 14/1349 [00:14<23:03,  1.04s/it][A
Iteration:   1%|          | 15/1349 [00:15<22:52,  1.03s/it][A
Iteration:   1%|          | 16/1349 [00:16<22:39

Iteration:   9%|▉         | 128/1349 [02:09<20:46,  1.02s/it][A
Iteration:  10%|▉         | 129/1349 [02:10<20:40,  1.02s/it][A
Iteration:  10%|▉         | 130/1349 [02:11<21:12,  1.04s/it][A
Iteration:  10%|▉         | 131/1349 [02:12<20:54,  1.03s/it][A
Iteration:  10%|▉         | 132/1349 [02:13<20:44,  1.02s/it][A
Iteration:  10%|▉         | 133/1349 [02:15<20:38,  1.02s/it][A
Iteration:  10%|▉         | 134/1349 [02:15<20:28,  1.01s/it][A
Iteration:  10%|█         | 135/1349 [02:16<20:21,  1.01s/it][A
Iteration:  10%|█         | 136/1349 [02:17<20:20,  1.01s/it][A
Iteration:  10%|█         | 137/1349 [02:18<20:18,  1.01s/it][A
Iteration:  10%|█         | 138/1349 [02:19<20:14,  1.00s/it][A
Iteration:  10%|█         | 139/1349 [02:20<20:12,  1.00s/it][A
Iteration:  10%|█         | 140/1349 [02:21<20:12,  1.00s/it][A
Iteration:  10%|█         | 141/1349 [02:23<20:12,  1.00s/it][A
Iteration:  11%|█         | 142/1349 [02:24<20:12,  1.00s/it][A
Iteration:  11%|█        

Iteration:  19%|█▉        | 254/1349 [04:17<18:26,  1.01s/it][A
Iteration:  19%|█▉        | 255/1349 [04:18<18:25,  1.01s/it][A
Iteration:  19%|█▉        | 256/1349 [04:19<18:54,  1.04s/it][A
Iteration:  19%|█▉        | 257/1349 [04:20<18:41,  1.03s/it][A
Iteration:  19%|█▉        | 258/1349 [04:21<18:34,  1.02s/it][A
Iteration:  19%|█▉        | 259/1349 [04:22<18:29,  1.02s/it][A
Iteration:  19%|█▉        | 260/1349 [04:23<18:24,  1.01s/it][A
Iteration:  19%|█▉        | 261/1349 [04:24<18:18,  1.01s/it][A
Iteration:  19%|█▉        | 262/1349 [04:25<18:19,  1.01s/it][A
Iteration:  19%|█▉        | 263/1349 [04:26<18:17,  1.01s/it][A
Iteration:  20%|█▉        | 264/1349 [04:27<18:47,  1.04s/it][A
Iteration:  20%|█▉        | 265/1349 [04:28<18:34,  1.03s/it][A
Iteration:  20%|█▉        | 266/1349 [04:29<18:27,  1.02s/it][A
Iteration:  20%|█▉        | 267/1349 [04:30<18:22,  1.02s/it][A
Iteration:  20%|█▉        | 268/1349 [04:31<18:16,  1.01s/it][A
Iteration:  20%|█▉       

Iteration:  28%|██▊       | 380/1349 [06:24<16:18,  1.01s/it][A
Iteration:  28%|██▊       | 381/1349 [06:25<16:19,  1.01s/it][A
Iteration:  28%|██▊       | 382/1349 [06:26<16:17,  1.01s/it][A
Iteration:  28%|██▊       | 383/1349 [06:27<16:16,  1.01s/it][A
Iteration:  28%|██▊       | 384/1349 [06:28<16:15,  1.01s/it][A
Iteration:  29%|██▊       | 385/1349 [06:29<16:14,  1.01s/it][A
Iteration:  29%|██▊       | 386/1349 [06:30<16:13,  1.01s/it][A
Iteration:  29%|██▊       | 387/1349 [06:31<16:14,  1.01s/it][A
Iteration:  29%|██▉       | 388/1349 [06:32<16:13,  1.01s/it][A
Iteration:  29%|██▉       | 389/1349 [06:33<16:09,  1.01s/it][A
Iteration:  29%|██▉       | 390/1349 [06:34<16:33,  1.04s/it][A
Iteration:  29%|██▉       | 391/1349 [06:35<16:25,  1.03s/it][A
Iteration:  29%|██▉       | 392/1349 [06:36<16:17,  1.02s/it][A
Iteration:  29%|██▉       | 393/1349 [06:37<16:12,  1.02s/it][A
Iteration:  29%|██▉       | 394/1349 [06:38<16:10,  1.02s/it][A
Iteration:  29%|██▉      

Iteration:  38%|███▊      | 506/1349 [08:32<14:15,  1.01s/it][A
Iteration:  38%|███▊      | 507/1349 [08:33<14:08,  1.01s/it][A
Iteration:  38%|███▊      | 508/1349 [08:34<14:34,  1.04s/it][A
Iteration:  38%|███▊      | 509/1349 [08:35<14:25,  1.03s/it][A
Iteration:  38%|███▊      | 510/1349 [08:36<14:18,  1.02s/it][A
Iteration:  38%|███▊      | 511/1349 [08:37<14:13,  1.02s/it][A
Iteration:  38%|███▊      | 512/1349 [08:38<14:07,  1.01s/it][A
Iteration:  38%|███▊      | 513/1349 [08:39<14:07,  1.01s/it][A
Iteration:  38%|███▊      | 514/1349 [08:40<14:02,  1.01s/it][A
Iteration:  38%|███▊      | 515/1349 [08:41<14:21,  1.03s/it][A
Iteration:  38%|███▊      | 516/1349 [08:42<14:14,  1.03s/it][A
Iteration:  38%|███▊      | 517/1349 [08:43<14:08,  1.02s/it][A
Iteration:  38%|███▊      | 518/1349 [08:44<14:29,  1.05s/it][A
Iteration:  38%|███▊      | 519/1349 [08:45<14:18,  1.03s/it][A
Iteration:  39%|███▊      | 520/1349 [08:46<14:08,  1.02s/it][A
Iteration:  39%|███▊     

Iteration:  47%|████▋     | 632/1349 [10:40<11:59,  1.00s/it][A
Iteration:  47%|████▋     | 633/1349 [10:41<11:57,  1.00s/it][A
Iteration:  47%|████▋     | 634/1349 [10:42<11:57,  1.00s/it][A
Iteration:  47%|████▋     | 635/1349 [10:43<11:57,  1.01s/it][A
Iteration:  47%|████▋     | 636/1349 [10:44<11:56,  1.01s/it][A
Iteration:  47%|████▋     | 637/1349 [10:45<11:54,  1.00s/it][A
Iteration:  47%|████▋     | 638/1349 [10:46<11:54,  1.00s/it][A
Iteration:  47%|████▋     | 639/1349 [10:47<11:53,  1.00s/it][A
Iteration:  47%|████▋     | 640/1349 [10:48<11:53,  1.01s/it][A
Iteration:  48%|████▊     | 641/1349 [10:49<11:50,  1.00s/it][A
Iteration:  48%|████▊     | 642/1349 [10:50<11:48,  1.00s/it][A
Iteration:  48%|████▊     | 643/1349 [10:51<11:47,  1.00s/it][A
Iteration:  48%|████▊     | 644/1349 [10:52<12:07,  1.03s/it][A
Iteration:  48%|████▊     | 645/1349 [10:53<12:00,  1.02s/it][A
Iteration:  48%|████▊     | 646/1349 [10:54<11:53,  1.02s/it][A
Iteration:  48%|████▊    

Iteration:  56%|█████▌    | 758/1349 [12:47<10:11,  1.03s/it][A
Iteration:  56%|█████▋    | 759/1349 [12:48<10:04,  1.02s/it][A
Iteration:  56%|█████▋    | 760/1349 [12:49<09:59,  1.02s/it][A
Iteration:  56%|█████▋    | 761/1349 [12:50<09:54,  1.01s/it][A
Iteration:  56%|█████▋    | 762/1349 [12:51<09:53,  1.01s/it][A
Iteration:  57%|█████▋    | 763/1349 [12:52<09:51,  1.01s/it][A
Iteration:  57%|█████▋    | 764/1349 [12:53<09:50,  1.01s/it][A
Iteration:  57%|█████▋    | 765/1349 [12:54<09:47,  1.01s/it][A
Iteration:  57%|█████▋    | 766/1349 [12:55<10:03,  1.03s/it][A
Iteration:  57%|█████▋    | 767/1349 [12:56<09:57,  1.03s/it][A
Iteration:  57%|█████▋    | 768/1349 [12:57<09:53,  1.02s/it][A
Iteration:  57%|█████▋    | 769/1349 [12:58<09:49,  1.02s/it][A
Iteration:  57%|█████▋    | 770/1349 [12:59<09:46,  1.01s/it][A
Iteration:  57%|█████▋    | 771/1349 [13:00<10:00,  1.04s/it][A
Iteration:  57%|█████▋    | 772/1349 [13:01<09:51,  1.03s/it][A
Iteration:  57%|█████▋   

Iteration:  66%|██████▌   | 884/1349 [14:55<07:52,  1.02s/it][A
Iteration:  66%|██████▌   | 885/1349 [14:56<07:50,  1.01s/it][A
Iteration:  66%|██████▌   | 886/1349 [14:57<07:47,  1.01s/it][A
Iteration:  66%|██████▌   | 887/1349 [14:58<07:59,  1.04s/it][A
Iteration:  66%|██████▌   | 888/1349 [14:59<08:07,  1.06s/it][A
Iteration:  66%|██████▌   | 889/1349 [15:00<07:59,  1.04s/it][A
Iteration:  66%|██████▌   | 890/1349 [15:01<07:54,  1.03s/it][A
Iteration:  66%|██████▌   | 891/1349 [15:02<07:48,  1.02s/it][A
Iteration:  66%|██████▌   | 892/1349 [15:03<07:59,  1.05s/it][A
Iteration:  66%|██████▌   | 893/1349 [15:04<07:51,  1.03s/it][A
Iteration:  66%|██████▋   | 894/1349 [15:05<07:46,  1.03s/it][A
Iteration:  66%|██████▋   | 895/1349 [15:06<07:43,  1.02s/it][A
Iteration:  66%|██████▋   | 896/1349 [15:07<07:40,  1.02s/it][A
Iteration:  66%|██████▋   | 897/1349 [15:08<07:37,  1.01s/it][A
Iteration:  67%|██████▋   | 898/1349 [15:09<07:33,  1.01s/it][A
Iteration:  67%|██████▋  

Iteration:  75%|███████▍  | 1009/1349 [17:01<05:43,  1.01s/it][A
Iteration:  75%|███████▍  | 1010/1349 [17:02<05:42,  1.01s/it][A
Iteration:  75%|███████▍  | 1011/1349 [17:03<05:40,  1.01s/it][A
Iteration:  75%|███████▌  | 1012/1349 [17:04<05:50,  1.04s/it][A
Iteration:  75%|███████▌  | 1013/1349 [17:05<05:45,  1.03s/it][A
Iteration:  75%|███████▌  | 1014/1349 [17:06<05:42,  1.02s/it][A
Iteration:  75%|███████▌  | 1015/1349 [17:07<05:39,  1.02s/it][A
Iteration:  75%|███████▌  | 1016/1349 [17:08<05:38,  1.02s/it][A
Iteration:  75%|███████▌  | 1017/1349 [17:09<05:37,  1.02s/it][A
Iteration:  75%|███████▌  | 1018/1349 [17:10<05:35,  1.01s/it][A
Iteration:  76%|███████▌  | 1019/1349 [17:11<05:33,  1.01s/it][A
Iteration:  76%|███████▌  | 1020/1349 [17:12<05:31,  1.01s/it][A
Iteration:  76%|███████▌  | 1021/1349 [17:13<05:30,  1.01s/it][A
Iteration:  76%|███████▌  | 1022/1349 [17:14<05:29,  1.01s/it][A
Iteration:  76%|███████▌  | 1023/1349 [17:15<05:29,  1.01s/it][A
Iteration:

Iteration:  84%|████████▍ | 1133/1349 [19:07<03:39,  1.02s/it][A
Iteration:  84%|████████▍ | 1134/1349 [19:08<03:38,  1.01s/it][A
Iteration:  84%|████████▍ | 1135/1349 [19:09<03:36,  1.01s/it][A
Iteration:  84%|████████▍ | 1136/1349 [19:10<03:35,  1.01s/it][A
Iteration:  84%|████████▍ | 1137/1349 [19:11<03:34,  1.01s/it][A
Iteration:  84%|████████▍ | 1138/1349 [19:12<03:33,  1.01s/it][A
Iteration:  84%|████████▍ | 1139/1349 [19:13<03:31,  1.01s/it][A
Iteration:  85%|████████▍ | 1140/1349 [19:14<03:37,  1.04s/it][A
Iteration:  85%|████████▍ | 1141/1349 [19:15<03:33,  1.03s/it][A
Iteration:  85%|████████▍ | 1142/1349 [19:16<03:31,  1.02s/it][A
Iteration:  85%|████████▍ | 1143/1349 [19:17<03:29,  1.02s/it][A
Iteration:  85%|████████▍ | 1144/1349 [19:18<03:28,  1.02s/it][A
Iteration:  85%|████████▍ | 1145/1349 [19:19<03:26,  1.01s/it][A
Iteration:  85%|████████▍ | 1146/1349 [19:20<03:25,  1.01s/it][A
Iteration:  85%|████████▌ | 1147/1349 [19:21<03:24,  1.01s/it][A
Iteration:

Iteration:  93%|█████████▎| 1257/1349 [21:12<01:35,  1.04s/it][A
Iteration:  93%|█████████▎| 1258/1349 [21:13<01:33,  1.03s/it][A
Iteration:  93%|█████████▎| 1259/1349 [21:14<01:31,  1.02s/it][A
Iteration:  93%|█████████▎| 1260/1349 [21:15<01:30,  1.02s/it][A
Iteration:  93%|█████████▎| 1261/1349 [21:16<01:29,  1.01s/it][A
Iteration:  94%|█████████▎| 1262/1349 [21:17<01:28,  1.01s/it][A
Iteration:  94%|█████████▎| 1263/1349 [21:18<01:26,  1.01s/it][A
Iteration:  94%|█████████▎| 1264/1349 [21:19<01:25,  1.01s/it][A
Iteration:  94%|█████████▍| 1265/1349 [21:20<01:24,  1.01s/it][A
Iteration:  94%|█████████▍| 1266/1349 [21:21<01:23,  1.00s/it][A
Iteration:  94%|█████████▍| 1267/1349 [21:22<01:22,  1.01s/it][A
Iteration:  94%|█████████▍| 1268/1349 [21:24<01:21,  1.01s/it][A
Iteration:  94%|█████████▍| 1269/1349 [21:25<01:20,  1.00s/it][A
Iteration:  94%|█████████▍| 1270/1349 [21:26<01:19,  1.00s/it][A
Iteration:  94%|█████████▍| 1271/1349 [21:26<01:18,  1.00s/it][A
Iteration:

Train loss: 0.07615503477825997



Iteration:   0%|          | 1/1349 [00:01<22:37,  1.01s/it][A
Iteration:   0%|          | 2/1349 [00:02<22:35,  1.01s/it][A
Iteration:   0%|          | 3/1349 [00:03<22:35,  1.01s/it][A
Iteration:   0%|          | 4/1349 [00:04<22:31,  1.01s/it][A
Iteration:   0%|          | 5/1349 [00:05<22:29,  1.00s/it][A
Iteration:   0%|          | 6/1349 [00:06<22:28,  1.00s/it][A
Iteration:   1%|          | 7/1349 [00:07<22:25,  1.00s/it][A
Iteration:   1%|          | 8/1349 [00:08<22:25,  1.00s/it][A
Iteration:   1%|          | 9/1349 [00:09<22:23,  1.00s/it][A
Iteration:   1%|          | 10/1349 [00:10<22:20,  1.00s/it][A
Iteration:   1%|          | 11/1349 [00:11<22:21,  1.00s/it][A
Iteration:   1%|          | 12/1349 [00:12<22:24,  1.01s/it][A
Iteration:   1%|          | 13/1349 [00:13<22:21,  1.00s/it][A
Iteration:   1%|          | 14/1349 [00:14<22:25,  1.01s/it][A
Iteration:   1%|          | 15/1349 [00:15<22:25,  1.01s/it][A
Iteration:   1%|          | 16/1349 [00:16<22:22

Iteration:   9%|▉         | 128/1349 [02:09<21:05,  1.04s/it][A
Iteration:  10%|▉         | 129/1349 [02:10<20:54,  1.03s/it][A
Iteration:  10%|▉         | 130/1349 [02:11<20:40,  1.02s/it][A
Iteration:  10%|▉         | 131/1349 [02:12<20:36,  1.01s/it][A
Iteration:  10%|▉         | 132/1349 [02:13<20:29,  1.01s/it][A
Iteration:  10%|▉         | 133/1349 [02:14<20:28,  1.01s/it][A
Iteration:  10%|▉         | 134/1349 [02:15<20:25,  1.01s/it][A
Iteration:  10%|█         | 135/1349 [02:16<20:22,  1.01s/it][A
Iteration:  10%|█         | 136/1349 [02:17<20:26,  1.01s/it][A
Iteration:  10%|█         | 137/1349 [02:18<20:27,  1.01s/it][A
Iteration:  10%|█         | 138/1349 [02:19<20:27,  1.01s/it][A
Iteration:  10%|█         | 139/1349 [02:20<20:26,  1.01s/it][A
Iteration:  10%|█         | 140/1349 [02:21<20:27,  1.02s/it][A
Iteration:  10%|█         | 141/1349 [02:22<20:24,  1.01s/it][A
Iteration:  11%|█         | 142/1349 [02:23<20:55,  1.04s/it][A
Iteration:  11%|█        

Iteration:  19%|█▉        | 254/1349 [04:17<18:26,  1.01s/it][A
Iteration:  19%|█▉        | 255/1349 [04:18<18:22,  1.01s/it][A
Iteration:  19%|█▉        | 256/1349 [04:19<18:17,  1.00s/it][A
Iteration:  19%|█▉        | 257/1349 [04:20<18:16,  1.00s/it][A
Iteration:  19%|█▉        | 258/1349 [04:21<18:15,  1.00s/it][A
Iteration:  19%|█▉        | 259/1349 [04:22<18:12,  1.00s/it][A
Iteration:  19%|█▉        | 260/1349 [04:23<18:11,  1.00s/it][A
Iteration:  19%|█▉        | 261/1349 [04:24<18:10,  1.00s/it][A
Iteration:  19%|█▉        | 262/1349 [04:25<18:10,  1.00s/it][A
Iteration:  19%|█▉        | 263/1349 [04:26<18:08,  1.00s/it][A
Iteration:  20%|█▉        | 264/1349 [04:27<18:07,  1.00s/it][A
Iteration:  20%|█▉        | 265/1349 [04:28<18:05,  1.00s/it][A
Iteration:  20%|█▉        | 266/1349 [04:29<18:07,  1.00s/it][A
Iteration:  20%|█▉        | 267/1349 [04:30<18:07,  1.00s/it][A
Iteration:  20%|█▉        | 268/1349 [04:31<18:11,  1.01s/it][A
Iteration:  20%|█▉       

Iteration:  28%|██▊       | 380/1349 [06:24<16:16,  1.01s/it][A
Iteration:  28%|██▊       | 381/1349 [06:25<16:15,  1.01s/it][A
Iteration:  28%|██▊       | 382/1349 [06:26<16:17,  1.01s/it][A
Iteration:  28%|██▊       | 383/1349 [06:27<16:15,  1.01s/it][A
Iteration:  28%|██▊       | 384/1349 [06:28<16:15,  1.01s/it][A
Iteration:  29%|██▊       | 385/1349 [06:29<16:10,  1.01s/it][A
Iteration:  29%|██▊       | 386/1349 [06:30<16:14,  1.01s/it][A
Iteration:  29%|██▊       | 387/1349 [06:31<16:12,  1.01s/it][A
Iteration:  29%|██▉       | 388/1349 [06:32<16:10,  1.01s/it][A
Iteration:  29%|██▉       | 389/1349 [06:33<16:08,  1.01s/it][A
Iteration:  29%|██▉       | 390/1349 [06:34<16:08,  1.01s/it][A
Iteration:  29%|██▉       | 391/1349 [06:35<16:06,  1.01s/it][A
Iteration:  29%|██▉       | 392/1349 [06:36<16:04,  1.01s/it][A
Iteration:  29%|██▉       | 393/1349 [06:37<16:03,  1.01s/it][A
Iteration:  29%|██▉       | 394/1349 [06:38<16:05,  1.01s/it][A
Iteration:  29%|██▉      

Iteration:  38%|███▊      | 506/1349 [08:32<14:13,  1.01s/it][A
Iteration:  38%|███▊      | 507/1349 [08:33<14:12,  1.01s/it][A
Iteration:  38%|███▊      | 508/1349 [08:34<14:09,  1.01s/it][A
Iteration:  38%|███▊      | 509/1349 [08:35<14:09,  1.01s/it][A
Iteration:  38%|███▊      | 510/1349 [08:36<14:08,  1.01s/it][A
Iteration:  38%|███▊      | 511/1349 [08:37<14:08,  1.01s/it][A
Iteration:  38%|███▊      | 512/1349 [08:38<14:06,  1.01s/it][A
Iteration:  38%|███▊      | 513/1349 [08:39<14:04,  1.01s/it][A
Iteration:  38%|███▊      | 514/1349 [08:40<14:00,  1.01s/it][A
Iteration:  38%|███▊      | 515/1349 [08:41<13:57,  1.00s/it][A
Iteration:  38%|███▊      | 516/1349 [08:42<13:57,  1.01s/it][A
Iteration:  38%|███▊      | 517/1349 [08:43<13:55,  1.00s/it][A
Iteration:  38%|███▊      | 518/1349 [08:44<13:53,  1.00s/it][A
Iteration:  38%|███▊      | 519/1349 [08:45<13:53,  1.00s/it][A
Iteration:  39%|███▊      | 520/1349 [08:46<13:51,  1.00s/it][A
Iteration:  39%|███▊     

Iteration:  47%|████▋     | 632/1349 [10:40<12:00,  1.01s/it][A
Iteration:  47%|████▋     | 633/1349 [10:41<11:57,  1.00s/it][A
Iteration:  47%|████▋     | 634/1349 [10:42<11:59,  1.01s/it][A
Iteration:  47%|████▋     | 635/1349 [10:43<11:58,  1.01s/it][A
Iteration:  47%|████▋     | 636/1349 [10:44<11:56,  1.01s/it][A
Iteration:  47%|████▋     | 637/1349 [10:45<11:56,  1.01s/it][A
Iteration:  47%|████▋     | 638/1349 [10:46<11:56,  1.01s/it][A
Iteration:  47%|████▋     | 639/1349 [10:47<11:55,  1.01s/it][A
Iteration:  47%|████▋     | 640/1349 [10:48<12:15,  1.04s/it][A
Iteration:  48%|████▊     | 641/1349 [10:49<12:08,  1.03s/it][A
Iteration:  48%|████▊     | 642/1349 [10:50<12:01,  1.02s/it][A
Iteration:  48%|████▊     | 643/1349 [10:51<12:20,  1.05s/it][A
Iteration:  48%|████▊     | 644/1349 [10:52<12:11,  1.04s/it][A
Iteration:  48%|████▊     | 645/1349 [10:53<12:05,  1.03s/it][A
Iteration:  48%|████▊     | 646/1349 [10:54<11:58,  1.02s/it][A
Iteration:  48%|████▊    

Iteration:  56%|█████▌    | 758/1349 [12:47<09:56,  1.01s/it][A
Iteration:  56%|█████▋    | 759/1349 [12:48<09:55,  1.01s/it][A
Iteration:  56%|█████▋    | 760/1349 [12:49<09:54,  1.01s/it][A
Iteration:  56%|█████▋    | 761/1349 [12:50<09:53,  1.01s/it][A
Iteration:  56%|█████▋    | 762/1349 [12:51<09:53,  1.01s/it][A
Iteration:  57%|█████▋    | 763/1349 [12:52<09:52,  1.01s/it][A
Iteration:  57%|█████▋    | 764/1349 [12:53<09:50,  1.01s/it][A
Iteration:  57%|█████▋    | 765/1349 [12:54<10:05,  1.04s/it][A
Iteration:  57%|█████▋    | 766/1349 [12:55<09:59,  1.03s/it][A
Iteration:  57%|█████▋    | 767/1349 [12:56<09:54,  1.02s/it][A
Iteration:  57%|█████▋    | 768/1349 [12:58<09:53,  1.02s/it][A
Iteration:  57%|█████▋    | 769/1349 [12:59<09:49,  1.02s/it][A
Iteration:  57%|█████▋    | 770/1349 [13:00<09:45,  1.01s/it][A
Iteration:  57%|█████▋    | 771/1349 [13:01<09:44,  1.01s/it][A
Iteration:  57%|█████▋    | 772/1349 [13:02<09:44,  1.01s/it][A
Iteration:  57%|█████▋   

Iteration:  66%|██████▌   | 884/1349 [14:55<07:50,  1.01s/it][A
Iteration:  66%|██████▌   | 885/1349 [14:56<07:49,  1.01s/it][A
Iteration:  66%|██████▌   | 886/1349 [14:57<07:48,  1.01s/it][A
Iteration:  66%|██████▌   | 887/1349 [14:58<07:48,  1.01s/it][A
Iteration:  66%|██████▌   | 888/1349 [14:59<07:46,  1.01s/it][A
Iteration:  66%|██████▌   | 889/1349 [15:00<07:43,  1.01s/it][A
Iteration:  66%|██████▌   | 890/1349 [15:01<07:42,  1.01s/it][A
Iteration:  66%|██████▌   | 891/1349 [15:02<07:41,  1.01s/it][A
Iteration:  66%|██████▌   | 892/1349 [15:03<07:42,  1.01s/it][A
Iteration:  66%|██████▌   | 893/1349 [15:04<07:40,  1.01s/it][A
Iteration:  66%|██████▋   | 894/1349 [15:05<07:39,  1.01s/it][A
Iteration:  66%|██████▋   | 895/1349 [15:06<07:37,  1.01s/it][A
Iteration:  66%|██████▋   | 896/1349 [15:07<07:35,  1.00s/it][A
Iteration:  66%|██████▋   | 897/1349 [15:08<07:47,  1.03s/it][A
Iteration:  67%|██████▋   | 898/1349 [15:09<07:42,  1.03s/it][A
Iteration:  67%|██████▋  

Iteration:  75%|███████▍  | 1009/1349 [17:02<05:44,  1.01s/it][A
Iteration:  75%|███████▍  | 1010/1349 [17:03<05:43,  1.01s/it][A
Iteration:  75%|███████▍  | 1011/1349 [17:04<05:42,  1.01s/it][A
Iteration:  75%|███████▌  | 1012/1349 [17:05<05:41,  1.01s/it][A
Iteration:  75%|███████▌  | 1013/1349 [17:06<05:39,  1.01s/it][A
Iteration:  75%|███████▌  | 1014/1349 [17:07<05:38,  1.01s/it][A
Iteration:  75%|███████▌  | 1015/1349 [17:08<05:37,  1.01s/it][A
Iteration:  75%|███████▌  | 1016/1349 [17:09<05:36,  1.01s/it][A
Iteration:  75%|███████▌  | 1017/1349 [17:10<05:35,  1.01s/it][A
Iteration:  75%|███████▌  | 1018/1349 [17:11<05:34,  1.01s/it][A
Iteration:  76%|███████▌  | 1019/1349 [17:12<05:33,  1.01s/it][A
Iteration:  76%|███████▌  | 1020/1349 [17:13<05:31,  1.01s/it][A
Iteration:  76%|███████▌  | 1021/1349 [17:14<05:31,  1.01s/it][A
Iteration:  76%|███████▌  | 1022/1349 [17:15<05:30,  1.01s/it][A
Iteration:  76%|███████▌  | 1023/1349 [17:16<05:29,  1.01s/it][A
Iteration:

Iteration:  84%|████████▍ | 1133/1349 [19:07<03:37,  1.01s/it][A
Iteration:  84%|████████▍ | 1134/1349 [19:08<03:36,  1.01s/it][A
Iteration:  84%|████████▍ | 1135/1349 [19:09<03:35,  1.00s/it][A
Iteration:  84%|████████▍ | 1136/1349 [19:10<03:34,  1.01s/it][A
Iteration:  84%|████████▍ | 1137/1349 [19:11<03:33,  1.01s/it][A
Iteration:  84%|████████▍ | 1138/1349 [19:12<03:31,  1.00s/it][A
Iteration:  84%|████████▍ | 1139/1349 [19:13<03:31,  1.00s/it][A
Iteration:  85%|████████▍ | 1140/1349 [19:14<03:29,  1.00s/it][A
Iteration:  85%|████████▍ | 1141/1349 [19:15<03:28,  1.00s/it][A
Iteration:  85%|████████▍ | 1142/1349 [19:16<03:28,  1.01s/it][A
Iteration:  85%|████████▍ | 1143/1349 [19:17<03:27,  1.01s/it][A
Iteration:  85%|████████▍ | 1144/1349 [19:18<03:25,  1.00s/it][A
Iteration:  85%|████████▍ | 1145/1349 [19:19<03:24,  1.00s/it][A
Iteration:  85%|████████▍ | 1146/1349 [19:20<03:23,  1.00s/it][A
Iteration:  85%|████████▌ | 1147/1349 [19:21<03:23,  1.01s/it][A
Iteration:

Iteration:  93%|█████████▎| 1257/1349 [21:13<01:32,  1.01s/it][A
Iteration:  93%|█████████▎| 1258/1349 [21:14<01:34,  1.04s/it][A
Iteration:  93%|█████████▎| 1259/1349 [21:15<01:32,  1.03s/it][A
Iteration:  93%|█████████▎| 1260/1349 [21:16<01:31,  1.02s/it][A
Iteration:  93%|█████████▎| 1261/1349 [21:17<01:29,  1.02s/it][A
Iteration:  94%|█████████▎| 1262/1349 [21:18<01:28,  1.01s/it][A
Iteration:  94%|█████████▎| 1263/1349 [21:19<01:27,  1.01s/it][A
Iteration:  94%|█████████▎| 1264/1349 [21:20<01:25,  1.01s/it][A
Iteration:  94%|█████████▍| 1265/1349 [21:21<01:24,  1.01s/it][A
Iteration:  94%|█████████▍| 1266/1349 [21:22<01:25,  1.03s/it][A
Iteration:  94%|█████████▍| 1267/1349 [21:23<01:23,  1.02s/it][A
Iteration:  94%|█████████▍| 1268/1349 [21:24<01:22,  1.02s/it][A
Iteration:  94%|█████████▍| 1269/1349 [21:25<01:21,  1.02s/it][A
Iteration:  94%|█████████▍| 1270/1349 [21:26<01:19,  1.01s/it][A
Iteration:  94%|█████████▍| 1271/1349 [21:27<01:18,  1.01s/it][A
Iteration:

Train loss: 0.057512551753210794





## Evaluate Model

In [20]:
pred_tags, true_tags = token_classifier.predict(dev_features)

Validation loss: 0.09586639727155367


In [24]:
f1_score(pred_tags, true_tags)

0.4521074735617062