# BERT Relation Extraction

In this notebook, we implement the model on the SemEval-2010 Task 8 dataset.

## Importing Packages

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import random
import copy
import logging
import csv
import json
import numpy as np
import torch
from transformers import (
    BertConfig,
    BertTokenizer,
)
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from tqdm import tqdm, trange
from transformers import AdamW, get_linear_schedule_with_warmup
import re
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [3]:
rel2id = {'<pad>': 0,
 'Cause-Effect(e1,e2)': 1,
 'Cause-Effect(e2,e1)': 2,
 'Component-Whole(e1,e2)': 3,
 'Component-Whole(e2,e1)': 4,
 'Content-Container(e1,e2)': 5,
 'Content-Container(e2,e1)': 6,
 'Entity-Destination(e1,e2)': 7,
 'Entity-Destination(e2,e1)': 8,
 'Entity-Origin(e1,e2)': 9,
 'Entity-Origin(e2,e1)': 10,
 'Instrument-Agency(e1,e2)': 11,
 'Instrument-Agency(e2,e1)': 12,
 'Member-Collection(e1,e2)': 13,
 'Member-Collection(e2,e1)': 14,
 'Message-Topic(e1,e2)': 15,
 'Message-Topic(e2,e1)': 16,
 'Other': 17,
 'Product-Producer(e1,e2)': 18,
 'Product-Producer(e2,e1)': 19}

id2rel = {v: k for k, v in rel2id.items()}

### Preparing SemEval-2010 Task 8 Data

We start by loading the data.

In [4]:
class InputExample(object):

    def __init__(self, guid, text_a, label):
        self.guid = guid
        self.text_a = text_a
        self.label = label

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


class InputFeatures(object):

    def __init__(
        self, input_ids, attention_mask, token_type_ids, label_id, e1_mask, e2_mask, pos1, pos2, mask, token
    ):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.label_id = label_id
        self.e1_mask = e1_mask
        self.e2_mask = e2_mask
        self.pos1 = pos1
        self.pos2 = pos2
        self.mask = mask
        self.token = token


    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


class process_dataset(object):
    """Processor for the dataset """

    def __init__(self, args):
        self.args = args
        self.relation_labels = get_label(args)

    @classmethod
    def _read_tsv(cls, input_file, quotechar=None):
        with open(input_file, "r", encoding="utf-8") as f:
            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
            lines = []
            for line in reader:
                lines.append(line)
            return lines

    def _create_examples(self, lines, set_type):
        examples = []
        for (i, line) in enumerate(lines):
            guid = "%s-%s" % (set_type, i)
            text_a = line[1]
            label = line[2]
            if i % 1000 == 0:
                logger.info(line)
                print(f"Creating Example {i}: {text_a}, Label: {label}")
            examples.append(InputExample(guid=guid, text_a=text_a, label=label))
        return examples

    def get_examples(self, mode):
        """
        Args:
            mode: train, dev, test
        """
        file_to_read = None
        if mode == "train":
            file_to_read = self.args.train_file
        elif mode == "dev":
            file_to_read = self.args.dev_file
        elif mode == "test":
            file_to_read = self.args.test_file

        logger.info(
            "LOOKING AT {}".format(os.path.join(self.args.data_dir, file_to_read))
        )
        return self._create_examples(
            self._read_tsv(os.path.join(self.args.data_dir, file_to_read)), mode
        )

processors = {"semeval": process_dataset}

### Word Embeddings

In [5]:
class WordEmbeddingLoader(object):

    def __init__(self, embedding_path, word_dim, min_freq):
        self.embedding_path = embedding_path
        self.embedding_dim = word_dim
        self.data_dir = '/content/drive/MyDrive/TextMiningCW/data_RE'
        self.min_freq = min_freq

    def sentence_process(self, sentence):
        e1 = re.findall(r'<e1>(.*)</e1>', sentence)[0]
        e2 = re.findall(r'<e2>(.*)</e2>', sentence)[0]
        sentence = sentence.replace('<e1>' + e1 + '</e1>', ' <e1> ' + e1 + ' </e1> ', 1)
        sentence = sentence.replace('<e2>' + e2 + '</e2>', ' <e2> ' + e2 + ' </e2> ', 1)
        sentence = ''.join(sentence)
        sentence = sentence.replace('< e1 >', '<e1>')
        sentence = sentence.replace('< e2 >', '<e2>')
        sentence = sentence.replace('< /e1 >', '</e1>')
        sentence = sentence.replace('< /e2 >', '</e2>')
        token = sentence.split()

        assert '<e1>' in sentence
        assert '<e2>' in sentence
        assert '</e1>' in sentence
        assert '</e2>' in sentence

        pure_token = []
        for i, word in enumerate(token):
            if '<e1>' == word:
                continue
            if '</e1>' == word:
                continue
            if '<e2>' in word:
                continue
            if '</e2>' in word:
                continue
            pure_token.append(word)

        return pure_token

    def __build_vocab(self):
        vocab = {}
        filename = 'train.tsv'
        with open(os.path.join(self.data_dir, filename), 'r', encoding='utf-8') as fr:
            for line in fr:
                parts = line.strip().split('\t')
                sentence = parts[1].split()
                sentence = self.sentence_process(' '.join(sentence))
                for token in sentence:
                    token = token.lower()
                    vocab[token] = vocab.get(token, 0) + 1
        vocab = set([token for token in vocab if vocab[token] > self.min_freq])
        return vocab

    def load_embedding(self):
        vocab = self.__build_vocab()
        token2id = {}
        token2id['PAD'] = len(token2id) #0
        token2id['UNK'] = len(token2id) #1
        token_emb = []
        with open(self.embedding_path, 'r', encoding='utf-8') as fr:
            for line in fr:
                line = line.strip().split()
                if len(line) != self.embedding_dim + 1:
                    continue
                if line[0] not in vocab:
                    continue
                token2id[line[0]] = len(token2id)
                token_emb.append(np.asarray(line[1:], dtype=np.float32))
        token_emb = np.stack(token_emb).reshape(-1, self.embedding_dim)

        # For special characters, the embeddings are randomly initialized using a normal distribution
        special_emb = np.random.uniform(-0.1, 0.1, size=(2, self.embedding_dim))

        token_emb = np.concatenate((special_emb, token_emb), axis=0)
        token_emb = token_emb.astype(np.float32).reshape(-1, self.embedding_dim)
        token_emb = torch.from_numpy(token_emb)
        return token2id, token_emb

In [6]:
EMBEDDING_PATH = '/content/drive/MyDrive/TextMiningCW/embeddings/glove.6B.300d.txt'
WORD_DIM = 300
MIN_FREQ = 1

word2id, word_vec = WordEmbeddingLoader(EMBEDDING_PATH, WORD_DIM, MIN_FREQ).load_embedding()

In [7]:
class RelationLoader(object):

    def __load_relation(self):
        relation_file = '/content/drive/MyDrive/TextMiningCW/data_RE/relation2id.txt'
        rel2id = {}
        id2rel = {}
        with open(relation_file, 'r', encoding='utf-8') as fr:
            for line in fr:
                relation, id_s = line.strip().split()
                id_d = int(id_s)
                rel2id[relation] = id_d
                id2rel[id_d] = relation
        return rel2id, id2rel, len(rel2id)

    def get_relation(self):
        return self.__load_relation()

In [8]:
rel2id, id2rel, class_num = RelationLoader().get_relation()

### Next, we aim to convert the examples into features. For this purpose, we use the following function which returns:
- `input_ids`: Indices of the input sequence tokens in the vocabulary.
- `attention_mask`: Mask to avoid attending to the padding token indices.

   Mask values are 0 or 1: 1 for tokens that are not masked, 0 for masked tokens (padding).
- `token_type_ids`: Segment token index to indicate the first and second part of the inputs.
- `label_id`: The index of the label.
- `e1_mask`: Mask that is 1 at the location of entity 1 and 0 elsewhere.
- `e2_mask`: Mask that is 1 at the location of entity 2 and 0 elsewhere.

In [9]:
 """
Converts examples to features for relation extraction task.

Args:
    examples (list): List of examples containing text and labels.
    max_seq_len (int): Maximum sequence length after tokenization.
    tokenizer (Tokenizer): Tokenizer to convert text to tokens.
    mode (str): Mode of operation ('train', 'dev', or 'test').
    cls_token (str, optional): CLS token (default: "[CLS]").
    cls_token_segment_id (int, optional): Segment ID for CLS token (default: 0).
    sep_token (str, optional): SEP token (default: "[SEP]").
    pad_token (int, optional): Padding token ID (default: 0).
    pad_token_segment_id (int, optional): Segment ID for padding token (default: 0).
    sequence_a_segment_id (int, optional): Segment ID for sequence A (default: 0).
    add_sep_token (bool, optional): Whether to add SEP token (default: False).
    mask_padding_with_zero (bool, optional): Whether to pad mask with zeros (default: True).

Returns:
    list: List of InputFeatures containing input_ids, attention_mask, token_type_ids, label_id, e1_mask, e2_mask, pos1, pos2, mask, token.
"""
def convert_examples_to_features(
    examples,
    max_seq_len,
    tokenizer,
    mode,
    cls_token="[CLS]",
    cls_token_segment_id=0,
    sep_token="[SEP]",
    pad_token=0,
    pad_token_segment_id=0,
    sequence_a_segment_id=0,
    add_sep_token=False,
    mask_padding_with_zero=True,
):
  features = []

  if mode == 'train':
    loader = train_loader
  elif mode == 'dev':
    loader = valid_loader
  elif mode == 'test':
    loader = test_loader

  for (batch_data,label),(ex_index, example) in zip(loader, enumerate(examples)):

        words = batch_data[0][0].tolist()
        pos1 = batch_data[0][1].tolist()
        pos2 = batch_data[0][2].tolist()
        mask = batch_data[0][3].tolist()
        e1_mask = batch_data[0][4].tolist()
        e2_mask = batch_data[0][5].tolist()

        if ex_index % 1000 == 0:
            logger.info("Writing example %d of %d" % (ex_index, len(examples)))
            print()

        tokens_a = tokenizer.tokenize(example.text_a)

        #if ex_index % 1000 == 0:
        #  print(f"Tokens for example {ex_index}: {tokens_a}")

        if "<e1>" not in tokens_a:
            break
        e11_p = tokens_a.index("<e1>")
        e12_p = tokens_a.index("</e1>")
        e21_p = tokens_a.index("<e2>")
        e22_p = tokens_a.index("</e2>")
        # Replace the token
        tokens_a[e11_p] = "$"
        tokens_a[e12_p] = "$"
        tokens_a[e21_p] = "#"
        tokens_a[e22_p] = "#"

        # Add 1 because of the [CLS] token
        e11_p += 1
        e12_p += 1
        e21_p += 1
        e22_p += 1

        # Take into account [CLS] and [SEP] with "- 2"8

        if add_sep_token:
            special_tokens_count = 2
        else:
            special_tokens_count = 1
        if len(tokens_a) > max_seq_len - special_tokens_count:
            tokens_a = tokens_a[: (max_seq_len - special_tokens_count)]

        tokens = tokens_a
        if add_sep_token:
            tokens += [sep_token]

        token_type_ids = [sequence_a_segment_id] * len(tokens)

        tokens = [cls_token] + tokens
        token_type_ids = [cls_token_segment_id] + token_type_ids

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

        padding_length = max_seq_len - len(input_ids)
        input_ids = input_ids + ([pad_token] * padding_length)
        attention_mask = attention_mask + (
            [0 if mask_padding_with_zero else 1] * padding_length
        )
        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)

        e1_mask = [0] * len(attention_mask)
        e2_mask = [0] * len(attention_mask)

        for i in range(e11_p, e12_p + 1):
            e1_mask[i] = 1
        for i in range(e21_p, e22_p + 1):
            e2_mask[i] = 1

        assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(
            len(input_ids), max_seq_len
        )
        assert (
            len(attention_mask) == max_seq_len
        ), "Error with attention mask length {} vs {}".format(
            len(attention_mask), max_seq_len
        )
        assert (
            len(token_type_ids) == max_seq_len
        ), "Error with token type length {} vs {}".format(
            len(token_type_ids), max_seq_len
        )

        label_id = int(example.label)

        features.append(
            InputFeatures(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                label_id=label_id,
                e1_mask=e1_mask,
                e2_mask=e2_mask,
                pos1=pos1,
                pos2=pos2,
                mask=mask,
                token=words
            )
        )

  return features

In [10]:
def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

We use the tokenizer provided by BERT and the model "bert-base-uncased", to which we add the tags present in our data.

In [11]:
ADDITIONAL_SPECIAL_TOKENS = ["<e1>", "</e1>", "<e2>", "</e2>"]

def load_tokenizer(args):
    tokenizer = MODEL_CLASSES[args.model_type][2].from_pretrained(
        args.model_name_or_path
    )
    tokenizer.add_special_tokens(
        {"additional_special_tokens": ADDITIONAL_SPECIAL_TOKENS}
    )
    return tokenizer

### We can now load the preprocessed dataset.

In [12]:
def load_and_cache_examples(args, tokenizer, mode):
    processor = processors[args.task](args)
    print("Processor loaded successfully:", processor)

    cached_features_file = os.path.join(
        args.data_dir,
        "cached_{}_{}_{}_{}".format(
            mode,
            args.task,
            list(filter(None, args.model_name_or_path.split("/"))).pop(),
            args.max_seq_len,
        ),
    )

    print("Cached features file:", cached_features_file)
    if os.path.exists(cached_features_file):
        print("Loading features from cached file...")
        features = torch.load(cached_features_file)
        print("Features loaded successfully.")
    else:
        print("Cached features file not found. Generating features...")
        if mode == "train":
            examples = processor.get_examples("train")
        elif mode == "dev":
            examples = processor.get_examples("dev")
        elif mode == "test":
            examples = processor.get_examples("test")
        else:
            raise Exception("Only train, dev, test is possible")

        print("Converting examples to features...")
        features = convert_examples_to_features(
            examples, args.max_seq_len, tokenizer, mode, add_sep_token=args.add_sep_token,
        )
        print("Examples converted to features successfully.")
        print(features)
        torch.save(features, cached_features_file)

    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor(
        [f.attention_mask for f in features], dtype=torch.long
    )
    all_token_type_ids = torch.tensor(
        [f.token_type_ids for f in features], dtype=torch.long
    )
    all_e1_mask = torch.tensor(
        [f.e1_mask for f in features], dtype=torch.long
    )  # adding mask e1
    all_e2_mask = torch.tensor(
        [f.e2_mask for f in features], dtype=torch.long
    )  # adding mask e2
    all_pos1 = torch.tensor(
        [f.pos1 for f in features], dtype=torch.long
    )
    all_pos2 = torch.tensor(
        [f.pos2 for f in features], dtype=torch.long
    )
    all_mask = torch.tensor(
        [f.mask for f in features], dtype=torch.long
    )
    all_token =torch.tensor(
        [f.token for f in features], dtype=torch.long
    )

    all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)

    dataset = TensorDataset(
        all_input_ids,
        all_attention_mask,
        all_token_type_ids,
        all_label_ids,
        all_e1_mask,
        all_e2_mask,
        all_pos1,
        all_pos2,
        all_mask,
        all_token
    )
    return dataset

### Get data from .json files to implement the attention layer

In [13]:
class BrevetsDateset(Dataset):
    def __init__(self, filename, rel2id, word2id, max_len, pos_dis):
        self.filename = filename
        self.rel2id = rel2id
        self.word2id = word2id
        self.max_len = max_len
        self.pos_dis = pos_dis
        self.data_dir = "/content/drive/MyDrive/TextMiningCW/data_RE"
        self.dataset, self.label = self.__load_data()

    # Position des mots par rapport aux entités e1 et e2
    def __get_pos_index(self, x):
        if x < -self.pos_dis:
            return 0
        if x >= -self.pos_dis and x <= self.pos_dis:
            return x + self.pos_dis + 1
        if x > self.pos_dis:
            return 2 * self.pos_dis + 2

    def __get_relative_pos(self, x, entity_pos):
        if x < entity_pos[0]:
            return self.__get_pos_index(x-entity_pos[0])
        elif x > entity_pos[1]:
            return self.__get_pos_index(x-entity_pos[1])
        else:
            return self.__get_pos_index(0)

    def __symbolize_sentence(self, e1_pos, e2_pos, sentence):
        """
            Args:
                e1_pos (tuple) span of e1
                e2_pos (tuple) span of e2
                sentence (list)

        """
        # mask vaut 2 entre les deux entités, 3 après la dernière entité, et 1 avant la première
        mask = [1] * len(sentence)
        if e1_pos[0] < e2_pos[0]:
            for i in range(e1_pos[0], e2_pos[1]+1):
                mask[i] = 2
            for i in range(e2_pos[1]+1, len(sentence)):
                mask[i] = 3
        else:
            for i in range(e2_pos[0], e1_pos[1]+1):
                mask[i] = 2
            for i in range(e1_pos[1]+1, len(sentence)):
                mask[i] = 3
        length = min(self.max_len, len(sentence))

        # e1_mask, e2_mask valent 1 à l'endroit de la phrase où l'entité 1 (resp 2) est présente
        e1_mask = [0] * self.max_len
        e2_mask = [0] * self.max_len
        for i in range(e1_pos[0], e1_pos[1]+1):
            if i < length:
                e1_mask[i] = 1
        for i in range(e2_pos[0], e2_pos[1]+1):
            if i < length:
                e2_mask[i] = 1

        words = []
        pos1 = []
        pos2 = []

        mask = mask[:length]

        for i in range(length):
            words.append(self.word2id.get(sentence[i].lower(), self.word2id['UNK']))
            pos1.append(self.__get_relative_pos(i, e1_pos))
            pos2.append(self.__get_relative_pos(i, e2_pos))

        if length < self.max_len:
            for i in range(length, self.max_len):
                mask.append(0)  # 'PAD' mask vaut 0
                words.append(self.word2id['PAD'])

                pos1.append(self.__get_relative_pos(i, e1_pos))
                pos2.append(self.__get_relative_pos(i, e2_pos))

        unit = np.asarray([words, pos1, pos2, mask, e1_mask, e2_mask], dtype=np.int64)
        unit = np.reshape(unit, newshape=(1, 6, self.max_len))
        return unit

    def __load_data(self):
        path_data_file = os.path.join(self.data_dir, self.filename)
        data = []
        labels = []
        with open(path_data_file, 'r', encoding='utf-8') as fr:
            for line in fr:
                line = json.loads(line.strip())
                label = line['relation']
                sentence = line['token']
                e1_pos = (line['subj_start'], line['subj_end'])
                e2_pos = (line['obj_start'], line['obj_end'])
                label_idx = self.rel2id[label]

                one_sentence = self.__symbolize_sentence(e1_pos, e2_pos, sentence)
                data.append(one_sentence)
                labels.append(label_idx)

        return data, labels

    def __getitem__(self, index):
        data = self.dataset[index]
        label = self.label[index]
        return data, label

    def __len__(self):
        return len(self.label)

class BrevetsDataLoader(object):
    def __init__(self, rel2id, word2id, batch_size, max_len, pos_dis):
        self.rel2id = rel2id
        self.word2id = word2id
        self.batch_size = batch_size
        self.max_len = max_len
        self.pos_dis = pos_dis
    def __collate_fn(self, batch):
        data, label = zip(*batch)  # unzip the batch data
        data = list(data)
        label = list(label)
        data = torch.from_numpy(np.concatenate(data, axis=0))
        label = torch.from_numpy(np.asarray(label, dtype=np.int64))
        return data, label

    def __get_data(self, filename, shuffle=False):
        dataset = BrevetsDateset(filename, self.rel2id, self.word2id, self.max_len, self.pos_dis)
        loader = DataLoader(
            dataset=dataset,
            batch_size=1,
            shuffle=shuffle,
            num_workers=2,
            collate_fn=self.__collate_fn
        )
        return loader

    def get_train(self):
        return self.__get_data('train.json', shuffle=True)

    def get_dev(self):
        return self.__get_data('valid.json', shuffle=False)

    def get_test(self):
        return self.__get_data('test.json', shuffle=False)

In [14]:
REL2ID = rel2id
WORD2ID = word2id
BATCH_SIZE = 1
MAX_LEN = 500
POS_DIS = 50

loader = BrevetsDataLoader(REL2ID, WORD2ID, BATCH_SIZE, MAX_LEN, POS_DIS)

test_loader = loader.get_test()
train_loader = loader.get_train()
valid_loader = loader.get_dev()

### The method described in the paper is as follows:

1. Take three vectors from BERT:
   - The token vector starting with [CLS]
   - The averaged vector of entity_1
   - The averaged vector of entity_2
2. Pass each vector through fully connected layers:
   - dropout -> tanh -> fc-layer
3. Concatenate the three vectors.
4. Pass the concatenated vector through a fully connected layer:
   - dropout -> fc-layer

In [15]:
import torch
import torch.nn as nn
from transformers import (BertModel, BertPreTrainedModel)
from torch.nn import init

PRETRAINED_MODEL_MAP = {
    'bert': BertModel,
}

class FCLayer(nn.Module):
    """
    Fully connected layer with optional activation function and dropout.

    Args:
        input_dim (int): Dimensionality of the input features.
        output_dim (int): Dimensionality of the output features.
        dropout_rate (float, optional): Dropout rate to apply (default: 0.).
        use_activation (bool, optional): Whether to apply activation function (default: True).
    """
    def __init__(self, input_dim, output_dim, dropout_rate=0., use_activation=True):
        super(FCLayer, self).__init__()
        self.use_activation = use_activation
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, output_dim)
        self.tanh = nn.Tanh()

    def forward(self, x):
        """
        Forward pass of the fully connected layer.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor after applying dropout and optional activation.
        """
        x = self.dropout(x)
        if self.use_activation:
            x = self.tanh(x)
        return self.linear(x)


class RBERT(BertPreTrainedModel):
    """
    RBERT (Relation-aware BERT) model for relation extraction tasks.

    Inherits from BertPreTrainedModel and extends it with additional layers and functionality
    tailored for relation extraction tasks, such as attention mechanisms and entity averaging.
    """
    def __init__(self, config, args, word_vec, pos_dis, pos_dim):
        super(RBERT, self).__init__(config)
        self.bert = PRETRAINED_MODEL_MAP[args.model_type](config=config)  # Load pretrained bert

        self.num_labels = config.num_labels

        self.filter_num = 100

        self.class_num = class_num

        self.cls_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args.dropout_rate)
        self.e1_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args.dropout_rate)
        self.e2_fc_layer = FCLayer(config.hidden_size, config.hidden_size, args.dropout_rate)
        self.label_classifier = FCLayer(config.hidden_size * 3, config.num_labels, args.dropout_rate, use_activation=False)

        self.word_vec = word_vec
        self.pos_dis = 50
        self.pos_dim = 4
        self.max_len = 384
        self.word_dim = 760
        self.dim = self.word_dim + 2 * self.pos_dim

        self.we = nn.Linear(
            in_features=self.dim * 2,
            out_features=self.dim* 2,
            bias=True
        )

        self.wa = nn.Linear(
            in_features=(self.dim)*2,
            out_features=1,
            bias=True
        )

        self.dense = nn.Linear(
            in_features=self.filter_num + 2 * self.dim,
            out_features=self.class_num,
            bias=True
        )

        self.word_embedding = nn.Embedding.from_pretrained(
            embeddings=self.word_vec,
            freeze=False,
        )
        self.pos1_embedding = nn.Embedding(
            num_embeddings=2 * self.pos_dis + 3,
            embedding_dim=self.pos_dim
        )
        self.pos2_embedding = nn.Embedding(
            num_embeddings=2 * self.pos_dis + 3,
            embedding_dim=self.pos_dim
        )

        self.tanh = nn.Tanh()

        self.wa = nn.Linear(
            in_features=self.dim*2,
            out_features=1,
            bias=True
        )

        self.dense = nn.Linear(
            in_features=self.filter_num + 2 * self.dim,
            out_features=self.class_num,
            bias=True
        )

        init.uniform_(self.pos1_embedding.weight, a=-0.1, b=0.1)
        init.uniform_(self.pos2_embedding.weight, a=-0.1, b=0.1)
        init.uniform_(self.we.weight, a=-0.1, b=0.1)
        init.constant_(self.we.bias, 0.)
        init.uniform_(self.wa.weight, a=-0.1, b=0.1)
        init.constant_(self.wa.bias, 0.)
        init.uniform_(self.dense.weight, a=-0.1, b=0.1)
        init.constant_(self.dense.bias, 0.)

    @staticmethod
    #This function computes the average vector representation of entities in the hidden output sequence based on entity masks.
    def entity_average(hidden_output, e_mask):
        e_mask_unsqueeze = e_mask.unsqueeze(1)  # [b, 1, j-i+1]
        length_tensor = (e_mask != 0).sum(dim=1).unsqueeze(1)  # [batch_size, 1]
        sum_vector = torch.bmm(e_mask_unsqueeze.float(), hidden_output).squeeze(1)  # [b, 1, j-i+1] * [b, j-i+1, dim] = [b, 1, dim] -> [b, dim]
        avg_vector = sum_vector.float() / length_tensor.float()
        return avg_vector

    def encoder_layer(self, token, pos1, pos2):
        # Embed the input tokens using the word embedding layer
        word_emb = self.word_embedding(token[:,:384])

        # Embed the position 1 markers using the position 1 embedding layer
        pos1_emb = self.pos1_embedding(pos1[:,:384])

        # Embed the position 2 markers using the position 2 embedding layer
        pos2_emb = self.pos2_embedding(pos2[:,:384])

        # Concatenate the word embeddings with zero vectors along the last dimension
        # to incorporate positional information
        word_emb = torch.cat([word_emb, torch.zeros(word_emb.size(0), 384, 460).to(token.device)], dim=-1)

        # Concatenate the word embeddings, position 1 embeddings, and position 2 embeddings
        # along the last dimension to create the final input embeddings
        emb = torch.cat(tensors=[word_emb, pos1_emb, pos2_emb], dim=-1)

        # emb : [BATCH_SIZE x MAX_LEN x (WORD_dim + 2*POS_DIM)]
        return emb


    def attention_layer(self, emb, entity, mask):
        # Expand the entity tensor to match the shape of the input embeddings (emb)
        entity = entity.unsqueeze(dim=1).expand(-1, self.max_len, -1)

        # Concatenate the input embeddings (emb) and the expanded entity tensor along the last dimension
        h = torch.cat(tensors=[emb, entity], dim=-1)

        # Flatten the concatenated tensor to prepare for linear transformation
        h_flat = h.view(-1, 2*self.dim)

        # Apply a linear transformation followed by a tanh activation function
        output = self.tanh(self.we(h_flat))

        # Apply another linear transformation to obtain attention scores for each position
        u_flat = self.wa(output)

        # Reshape the attention scores to match the shape of the input embeddings
        u = u_flat.view(-1, self.max_len)

        # Mask the attention scores to ignore padding tokens
        att_score = u.masked_fill(mask[:,:384].eq(0), float('-inf'))

        # Apply softmax to obtain attention weights
        att_weight = F.softmax(att_score, dim=-1).unsqueeze(dim=-1)

        # Apply attention weights to the input embeddings to compute the attended representation
        reps = torch.bmm(emb.transpose(1, 2), att_weight).squeeze(dim=-1)

        # reps : [BATCH_SIZE x (WORD_dim + 2*POS_DIM)]
        return reps

    def forward(self, input_ids, attention_mask, token_type_ids, labels, e1_mask, e2_mask, pos1, pos2, mask, token, eval=False):

      outputs = self.bert(input_ids, attention_mask=attention_mask,
                        token_type_ids=token_type_ids)  # sequence_output, pooled_output, (hidden_states), (attentions)
      sequence_output = outputs[0]
      pooled_output = outputs[1]  # [CLS]

      emb = self.encoder_layer(token, pos1, pos2)

      # Average
      e1_h = self.entity_average(sequence_output, e1_mask)
      e2_h = self.entity_average(sequence_output, e2_mask)

      e1_h = self.attention_layer(emb, e1_h, mask)
      e2_h = self.attention_layer(emb, e2_h, mask)

      # Dropout -> tanh -> fc_layer
      pooled_output = self.cls_fc_layer(pooled_output)
      e1_h = self.e1_fc_layer(e1_h)
      e2_h = self.e2_fc_layer(e2_h)

      # Concat -> fc_layer
      concat_h = torch.cat([pooled_output, e1_h, e2_h], dim=-1)
      logits = self.label_classifier(concat_h)

      outputs = (logits,) + outputs[2:]

      # Softmax
      if labels is not None:
        if self.num_labels == 1:
            loss_fct = nn.MSELoss()
            loss = loss_fct(logits.view(-1), labels.view(-1))
        else:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        outputs = (loss,) + outputs

      return outputs  # (loss), logits, (hidden_states), (attentions)

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### The following functions enable writing predictions and calculating the F1 score metric.

In [17]:
from sklearn.metrics import f1_score
def get_label(args):
    return [
        label.strip()
        for label in open(
            os.path.join(args.data_dir, args.label_file), "r", encoding="utf-8"
        )
    ]

def write_prediction(args, output_file, preds):
    relation_labels = get_label(args)
    with open(output_file, "w", encoding="utf-8") as f:
        for idx, pred in enumerate(preds):
            f.write("{}\t{}\n".format(8001 + idx, relation_labels[pred]))

def compute_metrics(preds, labels):
    print(f'Inside compute metrics: len(preds): {len(preds)}')
    print(f'Inside compute metrics: len(labels): {len(labels)}')
    assert len(preds) == len(labels)
    return acc_and_f1(preds, labels)

MODEL_CLASSES = {
    "bert": (BertConfig, RBERT, BertTokenizer),
}
MODEL_PATH_MAP = {
    "bert": "bert-base-uncased",
}

def simple_accuracy(preds, labels):
    return (preds == labels).mean()


def acc_and_f1(preds, labels, average="macro"):
    acc = simple_accuracy(preds, labels)
    f1 = f1_score(preds, labels, average="micro")
    return {
        "acc": acc,
        "f1": f1,
    }

## Functions for Training and Evaluation

In [27]:
import logging
logger = logging.getLogger(__name__)
class Trainer(object):
    def __init__(self, args, train_dataset=None, dev_dataset=None, test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.label_lst = get_label(args)

        self.num_labels = len(self.label_lst)

        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]
        self.config = self.config_class.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task,
        )
        self.model = self.model_class.from_pretrained(
            args.model_name_or_path, config=self.config, args=args,word_vec=word_vec, pos_dis=50, pos_dim=300
        )

        # GPU or CPU
        self.device = (
            #"cpu"
            "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
        )
        self.model.to(self.device)

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(
            self.train_dataset,
            sampler=train_sampler,
            batch_size=self.args.train_batch_size,
        )

        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            self.args.num_train_epochs = (
                self.args.max_steps
                // (len(train_dataloader) // self.args.gradient_accumulation_steps)
                + 1
            )
        else:
            t_total = (
                len(train_dataloader)
                // self.args.gradient_accumulation_steps
                * self.args.num_train_epochs
            )

        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p
                    for n, p in self.model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": self.args.weight_decay,
            },
            {
                "params": [
                    p
                    for n, p in self.model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=self.args.learning_rate,
            eps=self.args.adam_epsilon,
        )
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.args.warmup_steps,
            num_training_steps=t_total,
        )

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")
        loss_train = []
        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration")
            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(t.to(self.device) for t in batch)  # GPU or CPU

                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[3],
                    "e1_mask": batch[4],
                    "e2_mask": batch[5],
                    "pos1" : batch[6],
                    "pos2" : batch[7],
                    "mask" : batch[8],
                    "token" : batch[9]
                }

                outputs = self.model(**inputs)
                loss = outputs[0]


                if self.args.gradient_accumulation_steps > 1:
                    loss = loss / self.args.gradient_accumulation_steps

                loss.backward()
                loss_train.append(loss.item())
                tr_loss += loss.item()
                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(
                        self.model.parameters(), self.args.max_grad_norm
                    )

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if (
                        self.args.logging_steps > 0
                        and global_step % self.args.logging_steps == 0
                    ):
                        self.evaluate("test")

                    if (
                        self.args.save_steps > 0
                        and global_step % self.args.save_steps == 0
                    ):
                        self.save_model()

                if 0 < self.args.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.args.max_steps < global_step:
                train_iterator.close()
                break

        return global_step, tr_loss / global_step, loss_train

    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(
            dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size
        )

        # Eval!
        print(f"***** Running evaluation on {mode} dataset *****")
        print("  Num examples = %d", len(dataset))
        print("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()
        loss_eval = []
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[3],
                    "e1_mask": batch[4],
                    "e2_mask": batch[5],
                    "pos1" : batch[6],
                    "pos2" : batch[7],
                    "mask" : batch[8],
                    "token" : batch[9],
                    "eval" : True
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
                loss_eval.append(tmp_eval_loss.mean().item())
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0
                )


        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        write_prediction(
            self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds
        )

        print(f'preds len: {np.unique(preds)}')
        print(f'out_label_ids len: {np.unique(out_label_ids)}')

        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        print("***** Eval results *****")
        for key in sorted(results.keys()):
            print("  {} = {:.4f}".format(key, results[key]))

        return results, loss_eval, preds, out_label_ids

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.args.model_dir):
            os.makedirs(self.args.model_dir)
        model_to_save = (
            self.model.module if hasattr(self.model, "module") else self.model
        )
        model_to_save.save_pretrained(self.args.model_dir)

        # Save training arguments together with the trained model
        torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.args.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.args.model_dir):
          raise Exception("Model doesn't exists! Train first!")

        try:
          self.args = torch.load(
               os.path.join(self.args.model_dir, "training_args.bin")
            )
          self.config = self.config_class.from_pretrained(self.args.model_dir)
          # Provide required arguments when loading the model
          self.model = self.model_class.from_pretrained(
          self.args.model_dir,
          config=self.config,
          args=self.args,
          word_vec=word_vec,  # Provide the 'word_vec' argument
          pos_dis=50,         # Provide the 'pos_dis' argument
          pos_dim=300         # Provide the 'pos_dim' argument
          )
          self.model.to(self.device)
          logger.info("***** Model Loaded *****")
        except:
          raise Exception("Some model files might be missing...")


## Parameters

In [19]:
import argparse
import logging

parser = argparse.ArgumentParser()

parser.add_argument(
    "--task", default="semeval", type=str, help="The name of the task to train"
)
parser.add_argument(
    "--data_dir",
    default="/content/drive/MyDrive/TextMiningCW/data_RE/",
    type=str,
    help="The input data dir. Should contain the .tsv files (or other data files) for the task.",
)
parser.add_argument(
    "--model_dir", default="/content/drive/MyDrive/TextMiningCW/model/", type=str, help="Path to model"
)
parser.add_argument(
    "--eval_dir",
    default="/content/drive/MyDrive/TextMiningCW/eval/",
    type=str,
    help="Evaluation script, result directory",
)
parser.add_argument(
    "--train_file", default="train.tsv", type=str, help="Train file"
)
parser.add_argument("--test_file", default="test.tsv", type=str, help="Test file")
parser.add_argument(
    "--label_file", default="relation2id.txt", type=str, help="Label file"
)

parser.add_argument(
    "--model_type",
    default="bert",
    type=str,
    help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
)

parser.add_argument(
    "--seed", type=int, default=42, help="random seed for initialization"
)
parser.add_argument(
    "--train_batch_size", default=16, type=int, help="Batch size for training."
)
parser.add_argument(
    "--eval_batch_size", default=32, type=int, help="Batch size for evaluation."
)
parser.add_argument(
    "--max_seq_len",
    default=384,
    type=int,
    help="The maximum total input sequence length after tokenization.",
)
parser.add_argument(
    "--learning_rate",
    default=0.0001,
    type=float,
    help="The initial learning rate for Adam.",
)
parser.add_argument(
    "--num_train_epochs",
    default=15.0,
    type=float,
    help="Total number of training epochs to perform.",
)
parser.add_argument(
    "--weight_decay", default=0.0, type=float, help="Weight decay if we apply some."
)
parser.add_argument(
    "--gradient_accumulation_steps",
    type=int,
    default=1,
    help="Number of updates steps to accumulate before performing a backward/update pass.",
)
parser.add_argument(
    "--adam_epsilon", default=0.00001, type=float, help="Epsilon for Adam optimizer."
)
parser.add_argument(
    "--max_grad_norm", default=1.0, type=float, help="Max gradient norm."
)
parser.add_argument(
    "--max_steps",
    default=-1,
    type=int,
    help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
)
parser.add_argument(
    "--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps."
)
parser.add_argument(
    "--dropout_rate",
    default=0.1,
    type=float,
    help="Dropout for fully-connected layers",
)

parser.add_argument(
    "--logging_steps", type=int, default=250, help="Log every X updates steps."
)
parser.add_argument(
    "--save_steps",
    type=int,
    default=250,
    help="Save checkpoint every X updates steps.",
)

parser.add_argument(
    "--do_train", action="store_true", help="Whether to run training."
)
parser.add_argument(
    "--do_eval", action="store_true", help="Whether to run eval on the test set."
)
parser.add_argument(
    "--no_cuda", action="store_true", help="Avoid using CUDA when available"
)
parser.add_argument(
    "--add_sep_token",
    action="store_true",
    help="Add [SEP] token at the end of the sentence",
)

args = parser.parse_args("")


args.model_name_or_path = MODEL_PATH_MAP[args.model_type]

In [20]:
tokenizer = load_tokenizer(args)

train_dataset = load_and_cache_examples(args, tokenizer, mode="train")
test_dataset = load_and_cache_examples(args, tokenizer, mode="test")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Processor loaded successfully: <__main__.process_dataset object at 0x7ee1b9c4b520>
Cached features file: /content/drive/MyDrive/TextMiningCW/data_RE/cached_train_semeval_bert-base-uncased_384
Loading features from cached file...
Features loaded successfully.
Processor loaded successfully: <__main__.process_dataset object at 0x7ee1b9c290f0>
Cached features file: /content/drive/MyDrive/TextMiningCW/data_RE/cached_test_semeval_bert-base-uncased_384
Loading features from cached file...
Features loaded successfully.


## Training

In [None]:
len(train_dataset)

8000

In [None]:
len(test_dataset)

2717

In [None]:
import time

start_time = time.time()

trainer = Trainer(args, train_dataset=train_dataset, test_dataset=test_dataset)
args.do_train = True
if args.do_train:
    _, _, train_loss = trainer.train()
    trainer.save_model()

end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

Some weights of RBERT were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['cls_fc_layer.linear.bias', 'cls_fc_layer.linear.weight', 'dense.bias', 'dense.weight', 'e1_fc_layer.linear.bias', 'e1_fc_layer.linear.weight', 'e2_fc_layer.linear.bias', 'e2_fc_layer.linear.weight', 'label_classifier.linear.bias', 'label_classifier.linear.weight', 'pos1_embedding.weight', 'pos2_embedding.weight', 'wa.bias', 'wa.weight', 'we.bias', 'we.weight', 'word_embedding.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:   0%|          | 0/15 [00:00<?, ?it/s]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<08:46,  1.05s/it][A
Iteration:   0%|          | 2/500 [00:02<08:26,  1.02s/it][A
Iteration:   1%|          | 3/500 [00:03<08:19,  1.01s/it][A
Iteration:   1%|          | 4/500 [00:04<08:16,  1.00s/it][A
Iteration:   1%|          | 5/500 

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  4  6  7  8  9 10 11 12 13 16 17 18 19]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.5374
  f1 = 0.5374
  loss = 1.4878



Iteration:  50%|█████     | 250/500 [05:33<1:29:09, 21.40s/it][A
Iteration:  50%|█████     | 251/500 [05:34<1:03:33, 15.32s/it][A
Iteration:  50%|█████     | 252/500 [05:35<45:40, 11.05s/it]  [A
Iteration:  51%|█████     | 253/500 [05:36<33:11,  8.06s/it][A
Iteration:  51%|█████     | 254/500 [05:37<24:29,  5.97s/it][A
Iteration:  51%|█████     | 255/500 [05:38<18:24,  4.51s/it][A
Iteration:  51%|█████     | 256/500 [05:39<14:09,  3.48s/it][A
Iteration:  51%|█████▏    | 257/500 [05:41<11:11,  2.77s/it][A
Iteration:  52%|█████▏    | 258/500 [05:42<09:07,  2.26s/it][A
Iteration:  52%|█████▏    | 259/500 [05:43<07:40,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:44<06:39,  1.67s/it][A
Iteration:  52%|█████▏    | 261/500 [05:45<05:57,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:46<05:27,  1.38s/it][A
Iteration:  53%|█████▎    | 263/500 [05:47<05:06,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:48<04:51,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  6  7  9 10 11 12 13 14 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.7567
  f1 = 0.7567
  loss = 0.9076



Iteration: 100%|██████████| 500/500 [11:13<00:00,  1.35s/it]
Epoch:   7%|▋         | 1/15 [11:13<2:37:11, 673.71s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<08:52,  1.07s/it][A
Iteration:   0%|          | 2/500 [00:02<08:54,  1.07s/it][A
Iteration:   1%|          | 3/500 [00:03<08:55,  1.08s/it][A
Iteration:   1%|          | 4/500 [00:04<08:55,  1.08s/it][A
Iteration:   1%|          | 5/500 [00:05<08:55,  1.08s/it][A
Iteration:   1%|          | 6/500 [00:06<08:55,  1.08s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:57,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:58,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:56,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:58,  1.10s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:56,  1.10s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:53,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:51,  1.09s/it][A
Iteration:   3%|▎

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:03,  1.28it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.7957
  f1 = 0.7957
  loss = 0.7476



Iteration:  50%|█████     | 250/500 [05:40<1:28:36, 21.27s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:07, 15.21s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:20, 10.97s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<32:57,  8.01s/it][A
Iteration:  51%|█████     | 254/500 [05:44<24:18,  5.93s/it][A
Iteration:  51%|█████     | 255/500 [05:45<18:16,  4.48s/it][A
Iteration:  51%|█████     | 256/500 [05:46<14:03,  3.46s/it][A
Iteration:  51%|█████▏    | 257/500 [05:47<11:08,  2.75s/it][A
Iteration:  52%|█████▏    | 258/500 [05:48<09:04,  2.25s/it][A
Iteration:  52%|█████▏    | 259/500 [05:49<07:38,  1.90s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:38,  1.66s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:55,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:25,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:04,  1.28s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:49,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8049
  f1 = 0.8049
  loss = 0.7241



Iteration: 100%|██████████| 500/500 [11:19<00:00,  1.36s/it]
Epoch:  13%|█▎        | 2/15 [22:33<2:26:45, 677.31s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<08:56,  1.07s/it][A
Iteration:   0%|          | 2/500 [00:02<08:57,  1.08s/it][A
Iteration:   1%|          | 3/500 [00:03<08:56,  1.08s/it][A
Iteration:   1%|          | 4/500 [00:04<08:56,  1.08s/it][A
Iteration:   1%|          | 5/500 [00:05<08:57,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:58,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:57,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:57,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:58,  1.10s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:55,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:53,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:51,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:53,  1.10s/it][A
Iteration:   3%|▎

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.28it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:03,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.28it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:07<00:59,  1.28it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.28it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:56,  1.28it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8178
  f1 = 0.8178
  loss = 0.7213



Iteration:  50%|█████     | 250/500 [05:40<1:29:10, 21.40s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:30, 15.30s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:37, 11.04s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<33:09,  8.05s/it][A
Iteration:  51%|█████     | 254/500 [05:45<24:27,  5.96s/it][A
Iteration:  51%|█████     | 255/500 [05:46<18:23,  4.50s/it][A
Iteration:  51%|█████     | 256/500 [05:47<14:08,  3.48s/it][A
Iteration:  51%|█████▏    | 257/500 [05:48<11:11,  2.76s/it][A
Iteration:  52%|█████▏    | 258/500 [05:49<09:06,  2.26s/it][A
Iteration:  52%|█████▏    | 259/500 [05:50<07:39,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:39,  1.66s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:56,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:26,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:05,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:49,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8314
  f1 = 0.8314
  loss = 0.6387



Iteration: 100%|██████████| 500/500 [11:25<00:00,  1.37s/it]
Epoch:  20%|██        | 3/15 [33:59<2:16:13, 681.15s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:14,  1.11s/it][A
Iteration:   0%|          | 2/500 [00:02<09:06,  1.10s/it][A
Iteration:   1%|          | 3/500 [00:03<09:02,  1.09s/it][A
Iteration:   1%|          | 4/500 [00:04<08:58,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<08:55,  1.08s/it][A
Iteration:   1%|          | 6/500 [00:06<08:53,  1.08s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:53,  1.08s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:52,  1.08s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:52,  1.08s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:53,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:54,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:54,  1.10s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:53,  1.10s/it][A
Iteration:   3%|▎

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.28it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8145
  f1 = 0.8145
  loss = 0.8320



Iteration:  50%|█████     | 250/500 [05:46<1:36:25, 23.14s/it][A
Iteration:  50%|█████     | 251/500 [05:47<1:08:35, 16.53s/it][A
Iteration:  50%|█████     | 252/500 [05:48<49:09, 11.89s/it]  [A
Iteration:  51%|█████     | 253/500 [05:49<35:35,  8.65s/it][A
Iteration:  51%|█████     | 254/500 [05:50<26:08,  6.37s/it][A
Iteration:  51%|█████     | 255/500 [05:52<19:32,  4.79s/it][A
Iteration:  51%|█████     | 256/500 [05:53<14:56,  3.67s/it][A
Iteration:  51%|█████▏    | 257/500 [05:54<11:43,  2.90s/it][A
Iteration:  52%|█████▏    | 258/500 [05:55<09:29,  2.35s/it][A
Iteration:  52%|█████▏    | 259/500 [05:56<07:55,  1.97s/it][A
Iteration:  52%|█████▏    | 260/500 [05:57<06:49,  1.71s/it][A
Iteration:  52%|█████▏    | 261/500 [05:58<06:03,  1.52s/it][A
Iteration:  52%|█████▏    | 262/500 [05:59<05:30,  1.39s/it][A
Iteration:  53%|█████▎    | 263/500 [06:00<05:07,  1.30s/it][A
Iteration:  53%|█████▎    | 264/500 [06:01<04:51,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8259
  f1 = 0.8259
  loss = 0.8444



Iteration: 100%|██████████| 500/500 [11:26<00:00,  1.37s/it]
Epoch:  27%|██▋       | 4/15 [45:26<2:05:17, 683.43s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:24,  1.13s/it][A
Iteration:   0%|          | 2/500 [00:02<09:13,  1.11s/it][A
Iteration:   1%|          | 3/500 [00:03<09:07,  1.10s/it][A
Iteration:   1%|          | 4/500 [00:04<09:04,  1.10s/it][A
Iteration:   1%|          | 5/500 [00:05<09:01,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:59,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:58,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:58,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:56,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:55,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:12<08:54,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:53,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:51,  1.09s/it][A
Iteration:   3%|▎

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8237
  f1 = 0.8237
  loss = 0.9348



Iteration:  50%|█████     | 250/500 [05:40<1:29:29, 21.48s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:47, 15.37s/it][A
Iteration:  50%|█████     | 252/500 [05:43<45:49, 11.09s/it]  [A
Iteration:  51%|█████     | 253/500 [05:44<33:17,  8.09s/it][A
Iteration:  51%|█████     | 254/500 [05:45<24:32,  5.98s/it][A
Iteration:  51%|█████     | 255/500 [05:46<18:26,  4.52s/it][A
Iteration:  51%|█████     | 256/500 [05:47<14:10,  3.49s/it][A
Iteration:  51%|█████▏    | 257/500 [05:48<11:12,  2.77s/it][A
Iteration:  52%|█████▏    | 258/500 [05:49<09:08,  2.27s/it][A
Iteration:  52%|█████▏    | 259/500 [05:50<07:41,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:39,  1.67s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:57,  1.50s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:27,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:55<05:05,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:56<04:49,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.28it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:59,  1.28it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.28it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:56,  1.28it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:55,  1.28it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8329
  f1 = 0.8329
  loss = 0.8971



Iteration: 100%|██████████| 500/500 [11:25<00:00,  1.37s/it]
Epoch:  33%|███▎      | 5/15 [56:51<1:54:00, 684.09s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:13,  1.11s/it][A
Iteration:   0%|          | 2/500 [00:02<09:01,  1.09s/it][A
Iteration:   1%|          | 3/500 [00:03<08:57,  1.08s/it][A
Iteration:   1%|          | 4/500 [00:04<08:55,  1.08s/it][A
Iteration:   1%|          | 5/500 [00:05<08:54,  1.08s/it][A
Iteration:   1%|          | 6/500 [00:06<08:55,  1.08s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:54,  1.08s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:54,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:55,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:54,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:53,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:56,  1.10s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:53,  1.09s/it][A
Iteration:   3%|▎

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8377
  f1 = 0.8377
  loss = 1.0112



Iteration:  50%|█████     | 250/500 [05:40<1:29:09, 21.40s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:34, 15.32s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:40, 11.05s/it]  [A
Iteration:  51%|█████     | 253/500 [05:44<33:12,  8.06s/it][A
Iteration:  51%|█████     | 254/500 [05:45<24:29,  5.97s/it][A
Iteration:  51%|█████     | 255/500 [05:46<18:24,  4.51s/it][A
Iteration:  51%|█████     | 256/500 [05:47<14:10,  3.48s/it][A
Iteration:  51%|█████▏    | 257/500 [05:48<11:12,  2.77s/it][A
Iteration:  52%|█████▏    | 258/500 [05:49<09:07,  2.26s/it][A
Iteration:  52%|█████▏    | 259/500 [05:50<07:40,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:40,  1.67s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:57,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:28,  1.38s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:07,  1.30s/it][A
Iteration:  53%|█████▎    | 264/500 [05:56<04:51,  1.24s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.28it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8425
  f1 = 0.8425
  loss = 1.0246



Iteration: 100%|██████████| 500/500 [11:21<00:00,  1.36s/it]
Epoch:  40%|████      | 6/15 [1:08:12<1:42:28, 683.15s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:18,  1.12s/it][A
Iteration:   0%|          | 2/500 [00:02<09:09,  1.10s/it][A
Iteration:   1%|          | 3/500 [00:03<09:05,  1.10s/it][A
Iteration:   1%|          | 4/500 [00:04<09:02,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<09:00,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:59,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:57,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:56,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:56,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:55,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:12<08:55,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:52,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:57,  1.10s/it][A
Iteration:   3%

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8336
  f1 = 0.8336
  loss = 1.1631



Iteration:  50%|█████     | 250/500 [05:40<1:28:53, 21.33s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:21, 15.27s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:31, 11.01s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<33:05,  8.04s/it][A
Iteration:  51%|█████     | 254/500 [05:44<24:24,  5.95s/it][A
Iteration:  51%|█████     | 255/500 [05:45<18:21,  4.50s/it][A
Iteration:  51%|█████     | 256/500 [05:46<14:07,  3.48s/it][A
Iteration:  51%|█████▏    | 257/500 [05:48<11:10,  2.76s/it][A
Iteration:  52%|█████▏    | 258/500 [05:49<09:06,  2.26s/it][A
Iteration:  52%|█████▏    | 259/500 [05:50<07:40,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:38,  1.66s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:56,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:25,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:04,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:49,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:02,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<00:59,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:09<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8336
  f1 = 0.8336
  loss = 1.1579



Iteration: 100%|██████████| 500/500 [11:28<00:00,  1.38s/it]
Epoch:  47%|████▋     | 7/15 [1:19:41<1:31:18, 684.84s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:24,  1.13s/it][A
Iteration:   0%|          | 2/500 [00:02<09:07,  1.10s/it][A
Iteration:   1%|          | 3/500 [00:03<09:02,  1.09s/it][A
Iteration:   1%|          | 4/500 [00:04<08:58,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<08:55,  1.08s/it][A
Iteration:   1%|          | 6/500 [00:06<08:53,  1.08s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:53,  1.08s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:52,  1.08s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:53,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:52,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:52,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:51,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:50,  1.09s/it][A
Iteration:   3%

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:03,  1.28it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:55,  1.28it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8381
  f1 = 0.8381
  loss = 1.2477



Iteration:  50%|█████     | 250/500 [05:44<1:33:17, 22.39s/it][A
Iteration:  50%|█████     | 251/500 [05:45<1:06:25, 16.01s/it][A
Iteration:  50%|█████     | 252/500 [05:46<47:38, 11.53s/it]  [A
Iteration:  51%|█████     | 253/500 [05:47<34:33,  8.40s/it][A
Iteration:  51%|█████     | 254/500 [05:48<25:25,  6.20s/it][A
Iteration:  51%|█████     | 255/500 [05:49<19:02,  4.66s/it][A
Iteration:  51%|█████     | 256/500 [05:50<14:36,  3.59s/it][A
Iteration:  51%|█████▏    | 257/500 [05:51<11:30,  2.84s/it][A
Iteration:  52%|█████▏    | 258/500 [05:52<09:20,  2.31s/it][A
Iteration:  52%|█████▏    | 259/500 [05:53<07:49,  1.95s/it][A
Iteration:  52%|█████▏    | 260/500 [05:54<06:45,  1.69s/it][A
Iteration:  52%|█████▏    | 261/500 [05:56<05:59,  1.51s/it][A
Iteration:  52%|█████▏    | 262/500 [05:57<05:28,  1.38s/it][A
Iteration:  53%|█████▎    | 263/500 [05:58<05:06,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:59<04:51,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:02,  1.31it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<00:59,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:09<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8241
  f1 = 0.8241
  loss = 1.3235



Iteration: 100%|██████████| 500/500 [11:24<00:00,  1.37s/it]
Epoch:  53%|█████▎    | 8/15 [1:31:05<1:19:52, 684.63s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:25,  1.13s/it][A
Iteration:   0%|          | 2/500 [00:02<09:10,  1.11s/it][A
Iteration:   1%|          | 3/500 [00:03<09:04,  1.10s/it][A
Iteration:   1%|          | 4/500 [00:04<09:01,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<09:01,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<09:01,  1.10s/it][A
Iteration:   1%|▏         | 7/500 [00:07<09:04,  1.10s/it][A
Iteration:   2%|▏         | 8/500 [00:08<09:00,  1.10s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:57,  1.10s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:54,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:12<08:52,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:51,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:49,  1.09s/it][A
Iteration:   3%

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.28it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8395
  f1 = 0.8395
  loss = 1.3274



Iteration:  50%|█████     | 250/500 [05:39<1:28:41, 21.29s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:10, 15.22s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:23, 10.98s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<32:58,  8.01s/it][A
Iteration:  51%|█████     | 254/500 [05:44<24:19,  5.93s/it][A
Iteration:  51%|█████     | 255/500 [05:45<18:17,  4.48s/it][A
Iteration:  51%|█████     | 256/500 [05:46<14:04,  3.46s/it][A
Iteration:  51%|█████▏    | 257/500 [05:47<11:07,  2.75s/it][A
Iteration:  52%|█████▏    | 258/500 [05:48<09:04,  2.25s/it][A
Iteration:  52%|█████▏    | 259/500 [05:49<07:39,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:50<06:39,  1.66s/it][A
Iteration:  52%|█████▏    | 261/500 [05:51<05:56,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:26,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:05,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:50,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:02,  1.28it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8307
  f1 = 0.8307
  loss = 1.3956



Iteration: 100%|██████████| 500/500 [11:20<00:00,  1.36s/it]
Epoch:  60%|██████    | 9/15 [1:42:26<1:08:20, 683.41s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:16,  1.12s/it][A
Iteration:   0%|          | 2/500 [00:02<09:07,  1.10s/it][A
Iteration:   1%|          | 3/500 [00:03<09:02,  1.09s/it][A
Iteration:   1%|          | 4/500 [00:04<09:00,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<08:58,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:58,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:56,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:55,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:55,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:54,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:12<08:52,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:51,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:51,  1.09s/it][A
Iteration:   3%

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8370
  f1 = 0.8370
  loss = 1.3749



Iteration:  50%|█████     | 250/500 [05:40<1:29:00, 21.36s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:25, 15.28s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:33, 11.02s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<33:06,  8.04s/it][A
Iteration:  51%|█████     | 254/500 [05:44<24:24,  5.95s/it][A
Iteration:  51%|█████     | 255/500 [05:45<18:20,  4.49s/it][A
Iteration:  51%|█████     | 256/500 [05:46<14:07,  3.47s/it][A
Iteration:  51%|█████▏    | 257/500 [05:47<11:10,  2.76s/it][A
Iteration:  52%|█████▏    | 258/500 [05:48<09:06,  2.26s/it][A
Iteration:  52%|█████▏    | 259/500 [05:49<07:39,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:50<06:38,  1.66s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:56,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:27,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:05,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:49,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.28it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8384
  f1 = 0.8384
  loss = 1.3907



Iteration: 100%|██████████| 500/500 [11:20<00:00,  1.36s/it]
Epoch:  67%|██████▋   | 10/15 [1:53:46<56:52, 682.51s/it] 
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<08:50,  1.06s/it][A
Iteration:   0%|          | 2/500 [00:02<08:54,  1.07s/it][A
Iteration:   1%|          | 3/500 [00:03<08:56,  1.08s/it][A
Iteration:   1%|          | 4/500 [00:04<08:59,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<08:57,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:56,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:55,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:55,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:54,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:53,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:52,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:51,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:49,  1.09s/it][A
Iteration:   3%

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:02,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8373
  f1 = 0.8373
  loss = 1.3864



Iteration:  50%|█████     | 250/500 [05:40<1:28:50, 21.32s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:16, 15.25s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:27, 11.00s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<33:01,  8.02s/it][A
Iteration:  51%|█████     | 254/500 [05:44<24:21,  5.94s/it][A
Iteration:  51%|█████     | 255/500 [05:45<18:18,  4.48s/it][A
Iteration:  51%|█████     | 256/500 [05:46<14:04,  3.46s/it][A
Iteration:  51%|█████▏    | 257/500 [05:47<11:08,  2.75s/it][A
Iteration:  52%|█████▏    | 258/500 [05:48<09:04,  2.25s/it][A
Iteration:  52%|█████▏    | 259/500 [05:50<07:38,  1.90s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:38,  1.66s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:56,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:26,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:04,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:49,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8410
  f1 = 0.8410
  loss = 1.4240



Iteration: 100%|██████████| 500/500 [11:20<00:00,  1.36s/it]
Epoch:  73%|███████▎  | 11/15 [2:05:07<45:28, 682.01s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:10,  1.10s/it][A
Iteration:   0%|          | 2/500 [00:02<09:03,  1.09s/it][A
Iteration:   1%|          | 3/500 [00:03<09:01,  1.09s/it][A
Iteration:   1%|          | 4/500 [00:04<08:59,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<08:58,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:57,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:57,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:59,  1.10s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:59,  1.10s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:55,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:12<08:53,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:50,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:50,  1.09s/it][A
Iteration:   3%|

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8388
  f1 = 0.8388
  loss = 1.4559



Iteration:  50%|█████     | 250/500 [05:40<1:29:32, 21.49s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:50, 15.38s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:52, 11.10s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<33:19,  8.10s/it][A
Iteration:  51%|█████     | 254/500 [05:45<24:34,  5.99s/it][A
Iteration:  51%|█████     | 255/500 [05:46<18:27,  4.52s/it][A
Iteration:  51%|█████     | 256/500 [05:47<14:12,  3.50s/it][A
Iteration:  51%|█████▏    | 257/500 [05:48<11:15,  2.78s/it][A
Iteration:  52%|█████▏    | 258/500 [05:49<09:09,  2.27s/it][A
Iteration:  52%|█████▏    | 259/500 [05:50<07:42,  1.92s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:41,  1.67s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:58,  1.50s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:28,  1.38s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:05,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:49,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<00:59,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8403
  f1 = 0.8403
  loss = 1.4568



Iteration: 100%|██████████| 500/500 [11:24<00:00,  1.37s/it]
Epoch:  80%|████████  | 12/15 [2:16:31<34:08, 682.72s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:07,  1.10s/it][A
Iteration:   0%|          | 2/500 [00:02<08:59,  1.08s/it][A
Iteration:   1%|          | 3/500 [00:03<08:57,  1.08s/it][A
Iteration:   1%|          | 4/500 [00:04<08:55,  1.08s/it][A
Iteration:   1%|          | 5/500 [00:05<08:54,  1.08s/it][A
Iteration:   1%|          | 6/500 [00:06<08:53,  1.08s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:52,  1.08s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:52,  1.08s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:51,  1.08s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:52,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:50,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:50,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:49,  1.09s/it][A
Iteration:   3%|

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8392
  f1 = 0.8392
  loss = 1.4735



Iteration:  50%|█████     | 250/500 [05:40<1:28:52, 21.33s/it][A
Iteration:  50%|█████     | 251/500 [05:41<1:03:22, 15.27s/it][A
Iteration:  50%|█████     | 252/500 [05:42<45:32, 11.02s/it]  [A
Iteration:  51%|█████     | 253/500 [05:43<33:05,  8.04s/it][A
Iteration:  51%|█████     | 254/500 [05:44<24:24,  5.95s/it][A
Iteration:  51%|█████     | 255/500 [05:45<18:20,  4.49s/it][A
Iteration:  51%|█████     | 256/500 [05:46<14:07,  3.47s/it][A
Iteration:  51%|█████▏    | 257/500 [05:47<11:10,  2.76s/it][A
Iteration:  52%|█████▏    | 258/500 [05:49<09:06,  2.26s/it][A
Iteration:  52%|█████▏    | 259/500 [05:50<07:39,  1.91s/it][A
Iteration:  52%|█████▏    | 260/500 [05:51<06:38,  1.66s/it][A
Iteration:  52%|█████▏    | 261/500 [05:52<05:56,  1.49s/it][A
Iteration:  52%|█████▏    | 262/500 [05:53<05:26,  1.37s/it][A
Iteration:  53%|█████▎    | 263/500 [05:54<05:05,  1.29s/it][A
Iteration:  53%|█████▎    | 264/500 [05:55<04:50,  1.23s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8443
  f1 = 0.8443
  loss = 1.4560



Iteration: 100%|██████████| 500/500 [11:20<00:00,  1.36s/it]
Epoch:  87%|████████▋ | 13/15 [2:27:52<22:44, 682.13s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:09,  1.10s/it][A
Iteration:   0%|          | 2/500 [00:02<09:04,  1.09s/it][A
Iteration:   1%|          | 3/500 [00:03<09:01,  1.09s/it][A
Iteration:   1%|          | 4/500 [00:04<09:01,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<09:00,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:59,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:59,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:57,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:55,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:54,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:12<08:53,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:51,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:50,  1.09s/it][A
Iteration:   3%|

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:05,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.28it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:56,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.30it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8436
  f1 = 0.8436
  loss = 1.4643



Iteration:  50%|█████     | 250/500 [05:44<1:34:43, 22.73s/it][A
Iteration:  50%|█████     | 251/500 [05:45<1:07:25, 16.25s/it][A
Iteration:  50%|█████     | 252/500 [05:46<48:20, 11.70s/it]  [A
Iteration:  51%|█████     | 253/500 [05:47<35:01,  8.51s/it][A
Iteration:  51%|█████     | 254/500 [05:49<25:44,  6.28s/it][A
Iteration:  51%|█████     | 255/500 [05:50<19:16,  4.72s/it][A
Iteration:  51%|█████     | 256/500 [05:51<14:45,  3.63s/it][A
Iteration:  51%|█████▏    | 257/500 [05:52<11:36,  2.87s/it][A
Iteration:  52%|█████▏    | 258/500 [05:53<09:24,  2.33s/it][A
Iteration:  52%|█████▏    | 259/500 [05:54<07:52,  1.96s/it][A
Iteration:  52%|█████▏    | 260/500 [05:55<06:48,  1.70s/it][A
Iteration:  52%|█████▏    | 261/500 [05:56<06:02,  1.52s/it][A
Iteration:  52%|█████▏    | 262/500 [05:57<05:30,  1.39s/it][A
Iteration:  53%|█████▎    | 263/500 [05:58<05:07,  1.30s/it][A
Iteration:  53%|█████▎    | 264/500 [05:59<04:52,  1.24s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:04,  1.29it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:58,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8436
  f1 = 0.8436
  loss = 1.4666



Iteration: 100%|██████████| 500/500 [11:25<00:00,  1.37s/it]
Epoch:  93%|█████████▎| 14/15 [2:39:18<11:23, 683.20s/it]
Iteration:   0%|          | 0/500 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/500 [00:01<09:17,  1.12s/it][A
Iteration:   0%|          | 2/500 [00:02<09:08,  1.10s/it][A
Iteration:   1%|          | 3/500 [00:03<09:03,  1.09s/it][A
Iteration:   1%|          | 4/500 [00:04<09:00,  1.09s/it][A
Iteration:   1%|          | 5/500 [00:05<08:58,  1.09s/it][A
Iteration:   1%|          | 6/500 [00:06<08:58,  1.09s/it][A
Iteration:   1%|▏         | 7/500 [00:07<08:56,  1.09s/it][A
Iteration:   2%|▏         | 8/500 [00:08<08:55,  1.09s/it][A
Iteration:   2%|▏         | 9/500 [00:09<08:54,  1.09s/it][A
Iteration:   2%|▏         | 10/500 [00:10<08:53,  1.09s/it][A
Iteration:   2%|▏         | 11/500 [00:11<08:51,  1.09s/it][A
Iteration:   2%|▏         | 12/500 [00:13<08:51,  1.09s/it][A
Iteration:   3%|▎         | 13/500 [00:14<08:53,  1.10s/it][A
Iteration:   3%|

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.30it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:03,  1.29it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.29it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.29it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:01,  1.29it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<01:00,  1.29it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.29it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.30it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.30it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.29it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.30it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8425
  f1 = 0.8425
  loss = 1.4666



Iteration:  50%|█████     | 250/500 [05:44<1:33:43, 22.49s/it][A
Iteration:  50%|█████     | 251/500 [05:45<1:06:43, 16.08s/it][A
Iteration:  50%|█████     | 252/500 [05:46<47:51, 11.58s/it]  [A
Iteration:  51%|█████     | 253/500 [05:47<34:42,  8.43s/it][A
Iteration:  51%|█████     | 254/500 [05:48<25:31,  6.22s/it][A
Iteration:  51%|█████     | 255/500 [05:49<19:06,  4.68s/it][A
Iteration:  51%|█████     | 256/500 [05:50<14:38,  3.60s/it][A
Iteration:  51%|█████▏    | 257/500 [05:51<11:31,  2.84s/it][A
Iteration:  52%|█████▏    | 258/500 [05:52<09:20,  2.32s/it][A
Iteration:  52%|█████▏    | 259/500 [05:53<07:49,  1.95s/it][A
Iteration:  52%|█████▏    | 260/500 [05:54<06:45,  1.69s/it][A
Iteration:  52%|█████▏    | 261/500 [05:55<06:01,  1.51s/it][A
Iteration:  52%|█████▏    | 262/500 [05:57<05:30,  1.39s/it][A
Iteration:  53%|█████▎    | 263/500 [05:58<05:07,  1.30s/it][A
Iteration:  53%|█████▎    | 264/500 [05:59<04:51,  1.24s/it][A
Iteration:  53%|█████▎    | 265/5

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32




Evaluating:   0%|          | 0/85 [00:00<?, ?it/s][A[A

Evaluating:   1%|          | 1/85 [00:00<01:04,  1.29it/s][A[A

Evaluating:   2%|▏         | 2/85 [00:01<01:03,  1.30it/s][A[A

Evaluating:   4%|▎         | 3/85 [00:02<01:02,  1.30it/s][A[A

Evaluating:   5%|▍         | 4/85 [00:03<01:02,  1.30it/s][A[A

Evaluating:   6%|▌         | 5/85 [00:03<01:01,  1.30it/s][A[A

Evaluating:   7%|▋         | 6/85 [00:04<01:00,  1.30it/s][A[A

Evaluating:   8%|▊         | 7/85 [00:05<00:59,  1.30it/s][A[A

Evaluating:   9%|▉         | 8/85 [00:06<00:59,  1.30it/s][A[A

Evaluating:  11%|█         | 9/85 [00:06<00:58,  1.29it/s][A[A

Evaluating:  12%|█▏        | 10/85 [00:07<00:57,  1.29it/s][A[A

Evaluating:  13%|█▎        | 11/85 [00:08<00:57,  1.30it/s][A[A

Evaluating:  14%|█▍        | 12/85 [00:09<00:56,  1.29it/s][A[A

Evaluating:  15%|█▌        | 13/85 [00:10<00:55,  1.29it/s][A[A

Evaluating:  16%|█▋        | 14/85 [00:10<00:54,  1.30it/s][A[A

Evaluating:

preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8436
  f1 = 0.8436
  loss = 1.4575



Iteration: 100%|██████████| 500/500 [11:24<00:00,  1.37s/it]
Epoch: 100%|██████████| 15/15 [2:50:42<00:00, 682.83s/it]


Execution time: 10249.215040445328 seconds


## Testing

In [None]:
args.do_eval = True
if args.do_eval:
    trainer.load_model()
    res, test_loss, preds, labels  = trainer.evaluate("test")

***** Running evaluation on test dataset *****
  Num examples = %d 2717
  Batch size = %d 32


Evaluating: 100%|██████████| 85/85 [01:02<00:00,  1.36it/s]


preds len: [ 0  1  2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18]
out_label_ids len: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
Inside compute metrics: len(preds): 2717
Inside compute metrics: len(labels): 2717
***** Eval results *****
  acc = 0.8436
  f1 = 0.8436
  loss = 1.4575


Displaying scores by relation.

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
predict =  preds
true =  labels



positive_labels = [i for i in range(len(rel2id)) if i != 0]
positive_names = [list(rel2id.keys())[list(rel2id.values()).index(i)] for i in range(len(rel2id)) if i != 0]
print(classification_report(
                y_true=true,
                y_pred=predict,
                labels=positive_labels,
                target_names=positive_names
            ))


                           precision    recall  f1-score   support

      Cause-Effect(e1,e2)       0.87      0.97      0.91       210
      Cause-Effect(e2,e1)       0.80      0.96      0.88        51
   Component-Whole(e1,e2)       0.83      0.87      0.85       108
   Component-Whole(e2,e1)       0.85      0.91      0.88       123
 Content-Container(e1,e2)       0.65      0.77      0.71        22
 Content-Container(e2,e1)       0.83      0.83      0.83       134
Entity-Destination(e1,e2)       0.92      0.96      0.94       291
Entity-Destination(e2,e1)       0.00      0.00      0.00         1
     Entity-Origin(e1,e2)       0.91      0.95      0.93       134
     Entity-Origin(e2,e1)       0.90      0.93      0.91       194
 Instrument-Agency(e1,e2)       0.88      0.87      0.88       162
 Instrument-Agency(e2,e1)       0.79      0.83      0.81       150
 Member-Collection(e1,e2)       0.85      0.87      0.86       211
 Member-Collection(e2,e1)       0.89      0.87      0.88     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
