In [1]:
TRAIN_PATH = "/home/ubuntu/data/iptc/iptc_nathan_train.json"
DEV_PATH = "/home/ubuntu/data/iptc/iptc_nathan_dev.json"
TEST_PATH = "/home/ubuntu/data/iptc/iptc_nathan_test.json"

In [2]:
import json
from torchtext.data import Example
from torchtext.data import TabularDataset, Field, BucketIterator

text = Field(sequential=False)  #sequential = False because we do not want to tokenize yet.
labels = Field(sequential=False, use_vocab=False)

def read_file(f):
    with open(f) as i:
        data = json.load(i)
        
    examples = []
    for item in data:
        e = Example.fromdict({"labels": item["labels"], "text": item["title"] + " " + item["text"]}, 
                          fields={'labels': ('labels', labels), 'text': ('text', text)})
        examples.append(e)
    
    return examples
    
train_data = read_file(TRAIN_PATH)
dev_data = read_file(DEV_PATH)
test_data = read_file(TEST_PATH)
        

In [3]:
!pip install pytorch-pretrained-bert

[31mfastai 1.0.33 has requirement spacy==2.0.16, but you'll have spacy 2.0.18 which is incompatible.[0m
[31mfastai 1.0.33 has requirement thinc==6.12.0, but you'll have thinc 6.12.1 which is incompatible.[0m
[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [4]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
label_set = set()
for example in train_data:
    label_set.update(example.labels)

target_names = list(label_set)
label2idx = {label: idx for idx, label in enumerate(target_names)}
len(label2idx)

17

Available models are `bert-base-uncased`, `bert-large-uncased`, `bert-base-cased`, `bert-base-multilingual-uncased`, `bert-base-multilingual-cased`  and `bert-base-chinese`.

In [6]:
from pytorch_pretrained_bert.tokenization import BertTokenizer

BERT_MODEL = "bert-base-multilingual-uncased"

tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True)

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [7]:
from pytorch_pretrained_bert.modeling import PreTrainedBertModel, BertModel
import torch.nn as nn

class BertForMultiLabelSequenceClassification(PreTrainedBertModel):
    """BERT model for classification.
    This module is composed of the BERT model with a linear layer on top of
    the pooled output.
    Params:
        `config`: a BertConfig class instance with the configuration to build a new model.
        `num_labels`: the number of classes for the classifier. Default = 2.
    Inputs:
        `input_ids`: a torch.LongTensor of shape [batch_size, sequence_length]
            with the word token indices in the vocabulary(see the tokens preprocessing logic in the scripts
            `extract_features.py`, `run_classifier.py` and `run_squad.py`)
        `token_type_ids`: an optional torch.LongTensor of shape [batch_size, sequence_length] with the token
            types indices selected in [0, 1]. Type 0 corresponds to a `sentence A` and type 1 corresponds to
            a `sentence B` token (see BERT paper for more details).
        `attention_mask`: an optional torch.LongTensor of shape [batch_size, sequence_length] with indices
            selected in [0, 1]. It's a mask to be used if the input sequence length is smaller than the max
            input sequence length in the current batch. It's the mask that we typically use for attention when
            a batch has varying length sentences.
        `labels`: labels for the classification output: torch.LongTensor of shape [batch_size]
            with indices selected in [0, ..., num_labels].
    Outputs:
        if `labels` is not `None`:
            Outputs the CrossEntropy classification loss of the output with the labels.
        if `labels` is `None`:
            Outputs the classification logits of shape [batch_size, num_labels].
    Example usage:
    ```python
    # Already been converted into WordPiece token ids
    input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
    input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
    token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]])
    config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)
    num_labels = 2
    model = BertForSequenceClassification(config, num_labels)
    logits = model(input_ids, token_type_ids, input_mask)
    ```
    """
    def __init__(self, config, num_labels=2):
        super(BertForMultiLabelSequenceClassification, self).__init__(config)
        self.num_labels = num_labels
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_labels)
        self.apply(self.init_bert_weights)

    def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
        _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        if labels is not None:
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(logits, labels)
            return loss
        else:
            return logits

In [8]:
model = BertForMultiLabelSequenceClassification.from_pretrained(BERT_MODEL, num_labels = len(label2idx))
model.to(device)

BertForMultiLabelSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1)
            )
          )
          (intermediate): 

In [9]:
from pytorch_pretrained_bert.optimization import BertAdam

TRAIN_BATCH_SIZE = 16
GRADIENT_ACCUMULATION_STEPS = 1
NUM_TRAIN_EPOCHS = 100
LEARNING_RATE = 5e-5
WARMUP_PROPORTION = 0.1

num_train_steps = int(len(train_data) / TRAIN_BATCH_SIZE / GRADIENT_ACCUMULATION_STEPS * NUM_TRAIN_EPOCHS)

param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
t_total = num_train_steps

optimizer = BertAdam(optimizer_grouped_parameters,
                     LEARNING_RATE,
                     warmup=WARMUP_PROPORTION,
                     t_total=t_total)

In [10]:
import logging
import numpy as np

logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO)
logger = logging.getLogger(__name__)


class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, input_mask, segment_ids, label_id):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_id = label_id
        

def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer):
    """Loads a data file into a list of `InputBatch`s."""

    label_map = {label : i for i, label in enumerate(label_list)}
    
    features = []
    for (ex_index, example) in enumerate(examples):
        tokens = tokenizer.tokenize(example.text)

        if len(tokens) > max_seq_length - 2:
            tokens = tokens[:(max_seq_length - 2)]
            
        tokens = ["[CLS]"] + tokens + ["[SEP]"]
        segment_ids = [0] * len(tokens)
            
        input_ids = tokenizer.convert_tokens_to_ids(tokens)
        
        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        input_mask = [1] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding = [0] * (max_seq_length - len(input_ids))
        input_ids += padding
        input_mask += padding
        segment_ids += padding

        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length

        label_id = [label_map[l] for l in example.labels]
        label_vector = np.zeros(len(label_map))
        for i in label_id:
            label_vector[i] = 1
        if ex_index == 0:
            logger.info("*** Example ***")
            logger.info("tokens: %s" % " ".join(
                    [str(x) for x in tokens]))
            logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
            logger.info(
                    "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
            logger.info("label:" + str(example.labels) + " id: " + str(label_vector))

        features.append(
                InputFeatures(input_ids=input_ids,
                              input_mask=input_mask,
                              segment_ids=segment_ids,
                              label_id=label_vector))
    return features
            

In [15]:
from sklearn.metrics import classification_report, precision_recall_fscore_support
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

def warmup_linear(x, warmup=0.002):
    if x < warmup:
        return x/warmup
    return 1.0 - x


def get_data_loader(examples, max_seq_length, batch_size): 

    features = convert_examples_to_features(
        examples, target_names, max_seq_length, tokenizer)
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)
    data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
    # Run prediction for full data
    sampler = SequentialSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader


def evaluate(model, eval_dataloader, full_report=False, target_names=[]):

    eval_loss = 0
    nb_eval_steps = 0
    all_predictions, correct_labels = [], []

    for step, batch in enumerate(tqdm(eval_dataloader, desc="Evaluation iteration")):
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids = batch

        with torch.no_grad():
            tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids)
            logits = model(input_ids, segment_ids, input_mask)

        label_ids = label_ids.to('cpu').numpy()

        sigmoid = nn.Sigmoid()
        probs = sigmoid(logits)

        predicted_labels = probs.detach().cpu().numpy() > 0.5
        all_predictions += [list(p) for p in predicted_labels]
        correct_labels += [list(ids) for ids in label_ids]
        
        eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1
        

    eval_loss = eval_loss / nb_eval_steps
    
    correct_labels = np.array(correct_labels)
    all_predictions = np.array(all_predictions)

    if full_report:
        print(classification_report(correct_labels, all_predictions, target_names=target_names))
    else:
        print(precision_recall_fscore_support(correct_labels, all_predictions, average="micro"))
        
    return eval_loss

In [12]:
import torch
import os
from tqdm import trange
from tqdm import tqdm_notebook as tqdm

OUTPUT_DIR = "/tmp/"
MAX_SEQ_LENGTH = 100

train_sampler = RandomSampler(train_data)
train_dataloader = get_data_loader(train_data, MAX_SEQ_LENGTH, TRAIN_BATCH_SIZE)
dev_dataloader = get_data_loader(dev_data, MAX_SEQ_LENGTH, TRAIN_BATCH_SIZE)

global_step = 0
model.train()
loss_history = []
patience = 2
for _ in trange(int(NUM_TRAIN_EPOCHS), desc="Epoch"):
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    for step, batch in enumerate(tqdm(train_dataloader, desc="Training iteration")):
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids = batch
        loss = model(input_ids, segment_ids, input_mask, label_ids)

        if GRADIENT_ACCUMULATION_STEPS > 1:
            loss = loss / GRADIENT_ACCUMULATION_STEPS

        loss.backward()

        tr_loss += loss.item()
        nb_tr_examples += input_ids.size(0)
        nb_tr_steps += 1
        if (step + 1) % GRADIENT_ACCUMULATION_STEPS == 0:
            # modify learning rate with special warm up BERT uses
            lr_this_step = LEARNING_RATE * warmup_linear(global_step/t_total, WARMUP_PROPORTION)
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_this_step
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1

    eval_loss = evaluate(model, dev_dataloader)
    
    print("Loss history:", loss_history)
    print("Dev loss:", eval_loss)
    
    if len(loss_history) == 0 or eval_loss < min(loss_history):
        # Save a trained model
        model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
        output_model_file = os.path.join(OUTPUT_DIR, "pytorch_model.bin")
        torch.save(model_to_save.state_dict(), output_model_file)
    
    if len(loss_history) > 0 and eval_loss > max(loss_history[-patience:]): 
        print("No improvement on development set. Finish training.")
        break
        
    
    loss_history.append(eval_loss)

01/09/2019 09:55:18 - INFO - __main__ -   *** Example ***
01/09/2019 09:55:18 - INFO - __main__ -   tokens: [CLS] time roe ##pt vrouwen achter # met ##oo - beweging uit tot ' persoon van het jaar ' de internationale # met ##oo - beweging , waarbij vrouwen get ##ui ##gen over seks ##ueel mis ##bru ##ik en ong ##ep ##ast seks ##ueel ge ##drag , is door time magazine uit ##ger ##oe ##pen tot ' person of the year ' . de amerikaanse president donald trump staat op twee , gevolg ##d door de chinese president xi jin ##ping . " dit is de snel ##st ##gro ##eien ##de sociale beweging die we de laatste decenni ##a [SEP]
01/09/2019 09:55:18 - INFO - __main__ -   input_ids: 101 10573 91254 15903 40405 28361 108 10456 24227 118 56879 10611 10712 112 55416 10147 10184 12882 112 10102 14717 108 10456 24227 118 56879 117 23664 40405 13168 12220 11381 10323 26048 78193 12751 39678 11017 10109 12100 17768 17654 26048 78193 25463 32234 117 10127 10567 10573 12765 10611 11696 25587 12582 10712 112 14150 10

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.11333914559721012, 0.287292817679558, 0.1625507971241013, None)
Loss history: []
Dev loss: 0.6759394501882886


Epoch:   1%|          | 1/100 [01:02<1:42:39, 62.22s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.0, 0.0, 0.0, None)
Loss history: [0.6759394501882886]
Dev loss: 0.4195654704457238


  'precision', 'predicted', average, warn_for)
Epoch:   2%|▏         | 2/100 [02:09<1:44:03, 63.71s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.0, 0.0, 0.0, None)
Loss history: [0.6759394501882886, 0.4195654704457238]
Dev loss: 0.3296236906732832


Epoch:   3%|▎         | 3/100 [03:16<1:44:46, 64.81s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.9902912621359223, 0.056353591160221, 0.10663878724516467, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832]
Dev loss: 0.28321199402922675


Epoch:   4%|▍         | 4/100 [04:24<1:44:56, 65.59s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.812807881773399, 0.27348066298342544, 0.4092600248036379, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675]
Dev loss: 0.24442881156527807


Epoch:   5%|▌         | 5/100 [05:31<1:44:39, 66.10s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.7648202137998056, 0.43480662983425417, 0.554420570623459, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807]
Dev loss: 0.22472536043515282


Epoch:   6%|▌         | 6/100 [06:38<1:44:07, 66.46s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.8037775445960126, 0.42320441988950275, 0.5544697792254796, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807, 0.22472536043515282]
Dev loss: 0.2094918948317331


Epoch:   7%|▋         | 7/100 [07:46<1:43:24, 66.71s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.7681692732290708, 0.4613259668508287, 0.576458405246807, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807, 0.22472536043515282, 0.2094918948317331]
Dev loss: 0.2016692225422178


Epoch:   8%|▊         | 8/100 [08:53<1:42:34, 66.90s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.7864963503649635, 0.47624309392265196, 0.5932553337921541, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807, 0.22472536043515282, 0.2094918948317331, 0.2016692225422178]
Dev loss: 0.19578908088188324


Epoch:   9%|▉         | 9/100 [10:00<1:41:40, 67.04s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.7838283828382838, 0.5248618784530387, 0.628722700198544, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807, 0.22472536043515282, 0.2094918948317331, 0.2016692225422178, 0.19578908088188324]
Dev loss: 0.1865974378491205


Epoch:  10%|█         | 10/100 [11:08<1:40:42, 67.14s/it]

HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…

Epoch:  11%|█         | 11/100 [12:09<1:37:08, 65.49s/it]


(0.7384382107657316, 0.538121546961326, 0.6225631192074145, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807, 0.22472536043515282, 0.2094918948317331, 0.2016692225422178, 0.19578908088188324, 0.1865974378491205]
Dev loss: 0.19174955063869084


HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…

Epoch:  12%|█▏        | 12/100 [13:11<1:34:14, 64.26s/it]


(0.7253989361702128, 0.6027624309392265, 0.6584188292094145, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807, 0.22472536043515282, 0.2094918948317331, 0.2016692225422178, 0.19578908088188324, 0.1865974378491205, 0.19174955063869084]
Dev loss: 0.18991672602437792


HBox(children=(IntProgress(value=0, description='Training iteration', max=63, style=ProgressStyle(description_…




HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


(0.6875372689326178, 0.6370165745856353, 0.661313449956983, None)
Loss history: [0.6759394501882886, 0.4195654704457238, 0.3296236906732832, 0.28321199402922675, 0.24442881156527807, 0.22472536043515282, 0.2094918948317331, 0.2016692225422178, 0.19578908088188324, 0.1865974378491205, 0.19174955063869084, 0.18991672602437792]
Dev loss: 0.19546820850126326
No improvement on development set. Finish training.





In [16]:
# Load a trained model that you have fine-tuned
model_state_dict = torch.load(output_model_file)
model = BertForMultiLabelSequenceClassification.from_pretrained(BERT_MODEL, state_dict=model_state_dict, num_labels = len(label2idx))
model.to(device)

model.eval()

test_dataloader = get_data_loader(test_data[:2000], MAX_SEQ_LENGTH, TRAIN_BATCH_SIZE)

evaluate(model, train_dataloader, full_report=True, target_names=target_names)
evaluate(model, dev_dataloader, full_report=True, target_names=target_names)
evaluate(model, test_dataloader, full_report=True, target_names=target_names)

01/09/2019 10:21:35 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased.tar.gz from cache at /home/ubuntu/.pytorch_pretrained_bert/437da855f7aeb6dcc47ee03b11ac55bfbc069d31354f6867f3b298aad8429925.dd2dce7e7331017693bd2230dbc8015b12a975201a420a856a6efbf7ae9d84c5
01/09/2019 10:21:35 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/ubuntu/.pytorch_pretrained_bert/437da855f7aeb6dcc47ee03b11ac55bfbc069d31354f6867f3b298aad8429925.dd2dce7e7331017693bd2230dbc8015b12a975201a420a856a6efbf7ae9d84c5 to temp dir /tmp/tmp8trgmjfr
01/09/2019 10:21:41 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hi

HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


                                           precision    recall  f1-score   support

                                   health       0.86      0.35      0.49        52
                                    sport       1.00      0.93      0.96       254
                              environment       1.00      0.03      0.05        36
                                  weather       0.00      0.00      0.00        17
disaster, accident and emergency incident       0.97      0.87      0.92        69
                    lifestyle and leisure       0.85      0.61      0.71       100
                                  society       0.92      0.10      0.18       117
                   science and technology       0.00      0.00      0.00        14
   arts, culture, entertainment and media       0.98      0.88      0.93       217
                                   labour       0.00      0.00      0.00        30
                      religion and belief       0.00      0.00      0.00        22
  

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


                                           precision    recall  f1-score   support

                                   health       0.84      0.21      0.34        75
                                    sport       0.95      0.84      0.89       254
                              environment       1.00      0.03      0.07        29
                                  weather       0.00      0.00      0.00        15
disaster, accident and emergency incident       0.86      0.57      0.69        63
                    lifestyle and leisure       0.49      0.35      0.41       118
                                  society       1.00      0.08      0.14       130
                   science and technology       0.00      0.00      0.00        16
   arts, culture, entertainment and media       0.81      0.64      0.72       202
                                   labour       0.00      0.00      0.00        11
                      religion and belief       0.00      0.00      0.00        19
  

HBox(children=(IntProgress(value=0, description='Evaluation iteration', max=63, style=ProgressStyle(descriptio…


                                           precision    recall  f1-score   support

                                   health       0.55      0.14      0.22        44
                                    sport       0.95      0.86      0.90       263
                              environment       0.00      0.00      0.00        27
                                  weather       0.00      0.00      0.00        17
disaster, accident and emergency incident       0.87      0.59      0.70        68
                    lifestyle and leisure       0.63      0.51      0.57       103
                                  society       0.67      0.02      0.04        97
                   science and technology       0.00      0.00      0.00        14
   arts, culture, entertainment and media       0.81      0.67      0.73       186
                                   labour       0.00      0.00      0.00        25
                      religion and belief       0.00      0.00      0.00        21
  

0.1664357990736053