# Import required libraries
---

In [1]:
import os
import gzip
import itertools
import logging
import torch
from typing import List, Any
import torch.nn.functional as F
import numpy as np
from allennlp.modules import ConditionalRandomField
from allennlp.modules.conditional_random_field import allowed_transitions
from torch import nn
from torch.utils.data import DataLoader
from transformers import get_linear_schedule_with_warmup, AutoModel
from torch.utils.data import Dataset
from transformers import AutoTokenizer
from collections import defaultdict
from typing import Set
from overrides import overrides
from allennlp.training.metrics.metric import Metric
import argparse
import time
from pytorch_lightning import seed_everything
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, EarlyStopping
import warnings
from tqdm import tqdm
import random as rn

os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
conll_iob = {'B-ORG': 0, 'I-ORG': 1, 'B-MISC': 2, 'I-MISC': 3, 'B-LOC': 4, 
             'I-LOC': 5, 'B-PER': 6, 'I-PER': 7, 'O': 8}
wnut_iob = {'B-CORP': 0, 'I-CORP': 1, 'B-CW': 2, 'I-CW': 3, 'B-GRP': 4, 
            'I-GRP': 5, 'B-LOC': 6, 'I-LOC': 7, 'B-PER': 8, 'I-PER': 9, 
            'B-PROD': 10, 'I-PROD': 11, 'O': 12}
SEED = 42
rn.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

# Dataset Loading Functions and Class
---

In [3]:
def get_ner_reader(data):
    
    # 'fields' contains 4 lists 
    # The first list is the list of words present in the sentence
    # The last list is the list of ner tags of the words.
    
    fin = gzip.open(data, 'rt') if data.endswith('.gz') else open(data, 'rt')
    
    for is_divider, lines in itertools.groupby(fin, _is_divider):
        
        if is_divider:
            continue
        
        fields = [line.strip().split() for line in lines]
        fields = [list(field) for field in zip(*fields)]
        
        yield fields

# Function to assign the new tags 
def _assign_ner_tags(ner_tag, rep_):
    
    ner_tags_rep = []
    token_masks = []

    sub_token_len = len(rep_)
    token_masks.extend([True] * sub_token_len)
    
    if ner_tag[0] == 'B':
        
        in_tag = 'I' + ner_tag[1:]
        ner_tags_rep.append(ner_tag)
        ner_tags_rep.extend([in_tag] * (sub_token_len - 1))
    
    else:
        ner_tags_rep.extend([ner_tag] * sub_token_len)
    
    return ner_tags_rep, token_masks

# Function to extract spans (BI spans) and store in a dictionary
def extract_spans(tags):
    
    cur_tag = None
    cur_start = None
    gold_spans = {}

    def _save_span(_cur_tag, _cur_start, _cur_id, _gold_spans):
        
        if _cur_start is None:
            return _gold_spans
        
        _gold_spans[(_cur_start, _cur_id - 1)] = _cur_tag  # inclusive start & end, accord with conll-coref settings
        
        return _gold_spans

    # iterate over the tags
    for _id, nt in enumerate(tags):
        
        indicator = nt[0]
        
        if indicator == 'B':
            gold_spans = _save_span(cur_tag, cur_start, _id, gold_spans)
            cur_start = _id
            cur_tag = nt[2:]
            pass
        
        elif indicator == 'I':
            # do nothing
            pass
        
        elif indicator == 'O':
            gold_spans = _save_span(cur_tag, cur_start, _id, gold_spans)
            cur_tag = 'O'
            cur_start = _id
            pass
    
    _save_span(cur_tag, cur_start, _id + 1, gold_spans)
    
    return gold_spans


def _is_divider(line: str) -> bool:
    
    empty_line = line.strip() == ''
    
    if empty_line:
        return True

    first_token = line.split()[0]
    if first_token == "-DOCSTART-" or line.startswith('# id'):  # pylint: disable=simplifiable-if-statement
        return True

    return False

In [4]:
class CoNLLReader(Dataset):
    
    def __init__(self, max_instances = -1, max_length = 50, target_vocab = None, 
                 pretrained_dir = '', encoder_model = 'xlm-roberta-large'):
        
        self._max_instances = max_instances
        self._max_length = max_length

        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_dir + encoder_model)

        self.pad_token = self.tokenizer.special_tokens_map['pad_token']
        self.pad_token_id = self.tokenizer.get_vocab()['pad']
        self.sep_token = self.tokenizer.special_tokens_map['sep_token']

        self.label_to_id = {} if target_vocab is None else target_vocab
        self.instances = []

    def get_target_size(self):
        return len(set(self.label_to_id.values()))

    def get_target_vocab(self):
        return self.label_to_id

    def __len__(self):
        return len(self.instances)

    def __getitem__(self, item):
        return self.instances[item]

    def read_data(self, data):
        
        dataset_name = data if isinstance(data, str) else 'dataframe'

        print("Reading file {}".format(dataset_name))
        instance_idx = 0
        
        for fields in tqdm(get_ner_reader(data = data)):
            
            if self._max_instances != -1 and instance_idx > self._max_instances:
                break
            
            sentence_str, tokens_sub_rep, token_masks_rep, coded_ner_, gold_spans_ = self.parse_line_for_ner(fields = fields)
            
            tokens_tensor = torch.tensor(tokens_sub_rep, dtype = torch.long)
            tag_tensor = torch.tensor(coded_ner_, dtype = torch.long).unsqueeze(0)
            token_masks_rep = torch.tensor(token_masks_rep)

            self.instances.append((tokens_tensor, token_masks_rep, gold_spans_, tag_tensor))
            instance_idx += 1
                    
        print("Finished reading {:d} instances from file {}".format(len(self.instances), dataset_name))
    
    def parse_line_for_ner(self, fields):
        
        tokens_, ner_tags = fields[0], fields[-1]

        sentence_str, tokens_sub_rep, ner_tags_rep, token_masks_rep = self.parse_tokens_for_ner(tokens_, ner_tags)
        gold_spans_ = extract_spans(ner_tags_rep)
        coded_ner_ = [self.label_to_id[tag] for tag in ner_tags_rep]

        return sentence_str, tokens_sub_rep, token_masks_rep, coded_ner_, gold_spans_

    def parse_tokens_for_ner(self, tokens_, ner_tags):
        
        sentence_str = ''
        tokens_sub_rep, ner_tags_rep = [self.pad_token_id], ['O']
        
        for idx, token in enumerate(tokens_):
            
            if self._max_length != -1 and len(tokens_sub_rep) > self._max_length:
                break
            
            sentence_str += ' ' + ' '.join(self.tokenizer.tokenize(token.lower()))
            rep_ = self.tokenizer(token.lower())['input_ids']
            rep_ = rep_[1:-1]
            tokens_sub_rep.extend(rep_)

            # if we have a NER here, in the case of B, the first NER tag is the B tag, the rest are I tags.
            ner_tag = ner_tags[idx]
            tags, masks = _assign_ner_tags(ner_tag, rep_)
            ner_tags_rep.extend(tags)

        tokens_sub_rep.append(self.pad_token_id)
        ner_tags_rep.append('O')
        token_masks_rep = [True] * len(tokens_sub_rep)
        
        return sentence_str, tokens_sub_rep, ner_tags_rep, token_masks_rep

In [5]:
def get_tagset(tagging_scheme):
    if 'conll' in tagging_scheme:
        return conll_iob
    return wnut_iob

def get_reader(file_path, max_instances=-1, max_length=50, target_vocab=None, encoder_model='xlm-roberta-large'):
    if file_path is None:
        return None
    reader = CoNLLReader(max_instances=max_instances, max_length=max_length, target_vocab=target_vocab, encoder_model=encoder_model)
    reader.read_data(file_path)

    return reader

# Args Class
---

In [6]:
class Args():
    
    def __init__(self):
        
        self.train = '../input/semeval-task-11/EN-English/en_train.conll'
        self.test = '../input/semeval-task-11/EN-English/en_dev.conll'
        self.dev = '../input/semeval-task-11/EN-English/en_dev.conll'
        
        self.out_dir = './'
        self.iob_tagging = 'wnut'
        
        self.max_instances = -1
        self.max_length = 50
        
        # encoder_model options: xlm-roberta-base or ai4bharat/indic-bert or bert-base-multilingual-cased
        self.encoder_model = 'bert-base-multilingual-cased'
        self.model = './'
        self.model_name = 'bert-base-multilingual-cased'
        self.stage = 'fit'
        self.prefix = 'test'

        self.batch_size = 8
        self.gpus = 1
        self.device = 'cuda'
        self.epochs = 3
        self.lr = 1e-5
        self.dropout = 0.1
        self.max_grad_norm = 1.0

In [7]:
sg = Args()

# Metric
---

In [8]:
class SpanF1(Metric):
    
    def __init__(self, non_entity_labels = ['O']) -> None:
        
        self._num_gold_mentions = 0
        self._num_recalled_mentions = 0
        self._num_predicted_mentions = 0
        self._TP, self._FP, self._GT = defaultdict(int), defaultdict(int), defaultdict(int)
        self.non_entity_labels = set(non_entity_labels)

    @overrides
    def __call__(self, batched_predicted_spans, batched_gold_spans, sentences = None):
        
        non_entity_labels = self.non_entity_labels

        for predicted_spans, gold_spans in zip(batched_predicted_spans, batched_gold_spans):
            gold_spans_set = set([x for x, y in gold_spans.items() if y not in non_entity_labels])
            pred_spans_set = set([x for x, y in predicted_spans.items() if y not in non_entity_labels])

            self._num_gold_mentions += len(gold_spans_set)
            self._num_recalled_mentions += len(gold_spans_set & pred_spans_set)
            self._num_predicted_mentions += len(pred_spans_set)

            for ky, val in gold_spans.items():
                if val not in non_entity_labels:
                    self._GT[val] += 1

            for ky, val in predicted_spans.items():
                if val in non_entity_labels:
                    continue
                if ky in gold_spans and val == gold_spans[ky]:
                    self._TP[val] += 1
                else:
                    self._FP[val] += 1

    @overrides
    def get_metric(self, reset: bool = False) -> float:
        
        all_tags: Set[str] = set()
        all_tags.update(self._TP.keys())
        all_tags.update(self._FP.keys())
        all_tags.update(self._GT.keys())
        all_metrics = {}

        for tag in all_tags:
            precision, recall, f1_measure = self.compute_prf_metrics(true_positives=self._TP[tag],
                                                                     false_negatives=self._GT[tag] - self._TP[tag],
                                                                     false_positives=self._FP[tag])
            all_metrics['P@{}'.format(tag)] = precision
            all_metrics['R@{}'.format(tag)] = recall
            all_metrics['F1@{}'.format(tag)] = f1_measure

        # Compute the precision, recall and f1 for all spans jointly.
        precision, recall, f1_measure = self.compute_prf_metrics(true_positives=sum(self._TP.values()),
                                                                 false_positives=sum(self._FP.values()),
                                                                 false_negatives=sum(self._GT.values())-sum(self._TP.values()))
        all_metrics["micro@P"] = precision
        all_metrics["micro@R"] = recall
        all_metrics["micro@F1"] = f1_measure

        if self._num_gold_mentions == 0:
            entity_recall = 0.0
        else:
            entity_recall = self._num_recalled_mentions / float(self._num_gold_mentions)

        if self._num_predicted_mentions == 0:
            entity_precision = 0.0
        else:
            entity_precision = self._num_recalled_mentions / float(self._num_predicted_mentions)

        all_metrics['MD@R'] = entity_recall
        all_metrics['MD@P'] = entity_precision
        all_metrics['MD@F1'] = 2. * ((entity_precision * entity_recall) / (entity_precision + entity_recall + 1e-13))
        all_metrics['ALLTRUE'] = self._num_gold_mentions
        all_metrics['ALLRECALLED'] = self._num_recalled_mentions
        all_metrics['ALLPRED'] = self._num_predicted_mentions
        if reset:
            self.reset()
        return all_metrics

    @staticmethod
    def compute_prf_metrics(true_positives: int, false_positives: int, false_negatives: int):
        
        precision = float(true_positives) / float(true_positives + false_positives + 1e-13)
        recall = float(true_positives) / float(true_positives + false_negatives + 1e-13)
        f1_measure = 2. * ((precision * recall) / (precision + recall + 1e-13))
        return precision, recall, f1_measure

    @overrides
    def reset(self):
        
        self._num_gold_mentions = 0
        self._num_recalled_mentions = 0
        self._num_predicted_mentions = 0
        self._TP.clear()
        self._FP.clear()
        self._GT.clear()

# Model class
---

In [9]:
class NERModel(nn.Module):
    
    def __init__(self,
                 lr = 1e-5,
                 dropout_rate = 0.1,
                 batch_size = 16,
                 tag_to_id = None,
                 stage = 'fit',
                 pad_token_id = 1,
                 encoder_model = 'xlm-roberta-large',
                 num_gpus = 1):
        super(NERModel, self).__init__()

        self.id_to_tag = {v: k for k, v in tag_to_id.items()}
        self.tag_to_id = tag_to_id
        self.batch_size = batch_size

        self.stage = stage
        self.num_gpus = num_gpus
        self.target_size = len(self.id_to_tag)

        # set the default baseline model here
        self.pad_token_id = pad_token_id

        self.encoder_model = encoder_model
        self.encoder = AutoModel.from_pretrained(encoder_model, return_dict = True)

        self.feedforward = nn.Linear(in_features = self.encoder.config.hidden_size, 
                                     out_features = self.target_size)
        
        self.crf_layer = ConditionalRandomField(num_tags = self.target_size, 
                                                constraints = allowed_transitions(constraint_type = "BIO", 
                                                                                  labels = self.id_to_tag))

        self.lr = lr
        self.dropout = nn.Dropout(dropout_rate)

        self.span_f1 = SpanF1()        

    def forward(self, batch):
        
        tokens, tags, token_mask, metadata = batch
        batch_size = tokens.size(0)

        embedded_text_input = self.encoder(input_ids=tokens, attention_mask=token_mask)
        embedded_text_input = embedded_text_input.last_hidden_state
        embedded_text_input = self.dropout(F.leaky_relu(embedded_text_input))

        # project the token representation for classification
        token_scores = self.feedforward(embedded_text_input)

        # compute the log-likelihood loss and compute the best NER annotation sequence
        output = self._compute_token_tags(token_scores=token_scores, tags=tags, token_mask=token_mask, metadata=metadata, batch_size=batch_size)
        return output
    
    def _compute_token_tags(self, token_scores, tags, token_mask, metadata, batch_size):
        
        # compute the log-likelihood loss and compute the best NER annotation sequence
        loss = -self.crf_layer(token_scores, tags, token_mask) / float(batch_size)
        best_path = self.crf_layer.viterbi_tags(token_scores, token_mask)

        pred_results = []
        for i in range(batch_size):
            tag_seq, _ = best_path[i]
            pred_results.append(extract_spans([self.id_to_tag[x] for x in tag_seq if x in self.id_to_tag]))

        self.span_f1(pred_results, metadata)
        output = {"loss": loss, "results": self.span_f1.get_metric()}
        return output

# Function to load train and validation data
---

In [10]:
def dataloading():
    train_data = get_reader(file_path=sg.train, target_vocab=get_tagset(sg.iob_tagging), 
                            encoder_model=sg.encoder_model, max_instances=sg.max_instances,
                            max_length=sg.max_length)
    dev_data = get_reader(file_path=sg.dev, target_vocab=get_tagset(sg.iob_tagging), 
                          encoder_model=sg.encoder_model, max_instances=sg.max_instances, 
                          max_length=sg.max_length)

    return train_data, dev_data

In [11]:
train_data, dev_data = dataloading()

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/996k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Reading file ../input/semeval-task-11/EN-English/en_train.conll


15300it [00:30, 495.50it/s]


Finished reading 15300 instances from file ../input/semeval-task-11/EN-English/en_train.conll
Reading file ../input/semeval-task-11/EN-English/en_dev.conll


800it [00:01, 506.34it/s]

Finished reading 800 instances from file ../input/semeval-task-11/EN-English/en_dev.conll





# Prepare data into batches for training and evaluation
---

In [12]:
def collate_batch(batch):
        
        batch_ = list(zip(*batch))
        tokens, masks, gold_spans, tags = batch_[0], batch_[1], batch_[2], batch_[3]

        max_len = max([len(token) for token in tokens])
        token_tensor = torch.empty(size = (len(tokens), max_len), 
                                   dtype = torch.long).fill_(1)
        tag_tensor = torch.empty(size = (len(tokens), max_len), 
                                 dtype = torch.long).fill_(model.tag_to_id['O'])
        mask_tensor = torch.zeros(size = (len(tokens), max_len), dtype = torch.bool)

        for i in range(len(tokens)):
            
            tokens_ = tokens[i]
            seq_len = len(tokens_)

            token_tensor[i, :seq_len] = tokens_
            tag_tensor[i, :seq_len] = tags[i]
            mask_tensor[i, :seq_len] = masks[i]

        return token_tensor, tag_tensor, mask_tensor, gold_spans

In [13]:
def train_dataloader():
    loader = DataLoader(train_data, batch_size = sg.batch_size, collate_fn = collate_batch, num_workers = 1)
    return loader

def val_dataloader():
    loader = DataLoader(dev_data, batch_size = sg.batch_size, collate_fn = collate_batch, num_workers = 1)
    return loader

In [14]:
training_dataloader = train_dataloader()
validation_dataloader = val_dataloader()

In [15]:
model = NERModel(tag_to_id = train_data.get_target_vocab(), dropout_rate = sg.dropout, 
                 batch_size = sg.batch_size, stage = sg.stage, lr = sg.lr,
                         encoder_model = sg.encoder_model, num_gpus = sg.gpus)

Downloading:   0%|          | 0.00/714M [00:00<?, ?B/s]

In [16]:
model.to(sg.device)

NERModel(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)

In [17]:
optimizer = torch.optim.Adam(model.parameters(), lr=sg.lr)

# Training and Evaluation
---

In [18]:
def train_and_evaluate():
    
    print("----------------------- Training ----------------------------")
    print()
    
    # Training loop
    for epoch_i in tqdm(range(sg.epochs)):

        epoch_iterator = tqdm(training_dataloader, desc = "Iteration", position = 0, leave = True)

        # TRAIN loop
        model.train()
        training_loss = 0

        for step, batch in enumerate(epoch_iterator):
            #print(batch)
            batch = (batch[0].to(sg.device), batch[1].to(sg.device), batch[2].to(sg.device), batch[3])
            # forward pass
            output = model.forward(batch)

            # backward pass
            loss = output['loss']
            loss.backward()

            # track train loss
            training_loss += loss.item()

            # gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = sg.max_grad_norm)

            # update parameters
            optimizer.step()

        # print train loss per epoch
        training_loss = training_loss / len(training_dataloader)
        print()
        print('Epoch: {} \tTraining Loss: {:.5f}'.format(epoch_i + 1, training_loss))
    
        metric_scores = model.span_f1.get_metric()
        model.span_f1.reset()
        
        print()
        print("Epoch: {} metrics".format(epoch_i+1))
        print()
        for key, value in metric_scores.items():
            print("{}: {:.5f},".format(key, value), end = " ")
        print()
    
    print()
    print("--------------------- Evaluation ---------------------")
    print()
    
    # Loop for evaluation on validation set
    
    epoch_iterator = tqdm(validation_dataloader, desc = "Iteration", position = 0, leave = True)
    
    validation_loss = 0
    for step, batch in enumerate(epoch_iterator):
    
        batch = (batch[0].to(sg.device), batch[1].to(sg.device), batch[2].to(sg.device), batch[3])

        with torch.no_grad():
            output = model.forward(batch)

        loss = output['loss']
        validation_loss += loss.item()

    validation_loss = validation_loss / len(validation_dataloader)
    print()
    print('Validation Loss: {:.5f}'.format(validation_loss))
    print()
    metric_scores = model.span_f1.get_metric()
    model.span_f1.reset()
    print()
    print("Metrics on validation set")
    print()
    for key, value in metric_scores.items():
        print("{}: {:.5f},".format(key, value), end = " ")
    print()
    print()
    torch.save(model, "./" + sg.model_name + "_" + str(sg.batch_size) + "_" + str(sg.lr) + ".pt")
    print("Saved the model")

In [19]:
train_and_evaluate()

----------------------- Training ----------------------------



Iteration: 100%|██████████| 1913/1913 [03:56<00:00,  8.08it/s]
 33%|███▎      | 1/3 [03:56<07:53, 236.81s/it]


Epoch: 1 	Training Loss: 7.89017

Epoch: 1 metrics

P@GRP: 0.55956, R@GRP: 0.58667, F1@GRP: 0.57280, P@CORP: 0.62784, R@CORP: 0.50595, F1@CORP: 0.56034, P@PER: 0.81113, R@PER: 0.85307, F1@PER: 0.83157, P@PROD: 0.50071, R@PROD: 0.35990, F1@PROD: 0.41879, P@LOC: 0.76736, R@LOC: 0.70931, F1@LOC: 0.73720, P@CW: 0.54586, R@CW: 0.46162, F1@CW: 0.50022, micro@P: 0.66835, micro@R: 0.61398, micro@F1: 0.64001, MD@R: 0.71137, MD@P: 0.77437, MD@F1: 0.74154, ALLTRUE: 23553.00000, ALLRECALLED: 16755.00000, ALLPRED: 21637.00000, 


Iteration: 100%|██████████| 1913/1913 [03:55<00:00,  8.12it/s]
 67%|██████▋   | 2/3 [07:52<03:56, 236.09s/it]


Epoch: 2 	Training Loss: 3.87876

Epoch: 2 metrics

P@GRP: 0.77601, R@GRP: 0.73929, F1@GRP: 0.75721, P@CORP: 0.72664, R@CORP: 0.73738, F1@CORP: 0.73197, P@PER: 0.91764, R@PER: 0.93515, F1@PER: 0.92631, P@PROD: 0.63630, R@PROD: 0.60691, F1@PROD: 0.62126, P@LOC: 0.85540, R@LOC: 0.84809, F1@LOC: 0.85173, P@CW: 0.66962, R@CW: 0.66498, F1@CW: 0.66729, micro@P: 0.78522, micro@R: 0.77782, micro@F1: 0.78150, MD@R: 0.84257, MD@P: 0.85059, MD@F1: 0.84656, ALLTRUE: 23553.00000, ALLRECALLED: 19845.00000, ALLPRED: 23331.00000, 


Iteration: 100%|██████████| 1913/1913 [03:54<00:00,  8.15it/s]
100%|██████████| 3/3 [11:47<00:00, 235.70s/it]



Epoch: 3 	Training Loss: 2.63973

Epoch: 3 metrics

P@GRP: 0.83285, R@GRP: 0.80650, F1@GRP: 0.81946, P@CORP: 0.79968, R@CORP: 0.81099, F1@CORP: 0.80530, P@PER: 0.94541, R@PER: 0.95627, F1@PER: 0.95081, P@PROD: 0.71511, R@PROD: 0.72049, F1@PROD: 0.71779, P@LOC: 0.89874, R@LOC: 0.89519, F1@LOC: 0.89696, P@CW: 0.76452, R@CW: 0.76146, F1@CW: 0.76299, micro@P: 0.84231, micro@R: 0.84163, micro@F1: 0.84197, MD@R: 0.89097, MD@P: 0.89169, MD@F1: 0.89133, ALLTRUE: 23553.00000, ALLRECALLED: 20985.00000, ALLPRED: 23534.00000, 

--------------------- Evaluation ---------------------



Iteration: 100%|██████████| 100/100 [00:03<00:00, 25.63it/s]



Validation Loss: 5.16857


Metrics on validation set

P@GRP: 0.79545, R@GRP: 0.73684, F1@GRP: 0.76503, P@CORP: 0.80124, R@CORP: 0.66839, F1@CORP: 0.72881, P@PER: 0.88925, R@PER: 0.94138, F1@PER: 0.91457, P@PROD: 0.65354, R@PROD: 0.56463, F1@PROD: 0.60584, P@LOC: 0.74170, R@LOC: 0.85897, F1@LOC: 0.79604, P@CW: 0.62570, R@CW: 0.63636, F1@CW: 0.63099, micro@P: 0.76822, micro@R: 0.76260, micro@F1: 0.76540, MD@R: 0.84065, MD@P: 0.84685, MD@F1: 0.84374, ALLTRUE: 1230.00000, ALLRECALLED: 1034.00000, ALLPRED: 1221.00000, 

Saved the model
