### Base BERT with second-to-last pooling for word representation, then apply CRF to calculate sentence score, and minimize the negative log likelihood to train

In [1]:
import torch
from utils.bert_model import BERT_CRF
from utils.utils import prepare, predict, train, evaluate

hparams = {
    # 'path':'/home/peitian_zhang/Data/NER/test.txt', 
    'path':'/home/peitian_zhang/Data/NER/labeled_train.txt',
    'epochs': 10,
    'batch_size': 40,
    'embedding_dim': 768,
    'device':'cuda:1',
    'bert':'bert-base-chinese',
    'seq_length':256,
}

In [2]:
attr_dict, loaders = prepare(hparams)

In [3]:
bert_model = BERT_CRF(hparams,attr_dict['tag2idx']).to(hparams['device'])

In [4]:
bert_model = train(hparams, bert_model, loaders)

epoch 1 , step 21 , loss: 608.7438: : 22it [00:18,  1.19it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 0.0407}
epoch 2 , step 21 , loss: 306.9480: : 22it [00:17,  1.23it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 0.0407}
epoch 3 , step 21 , loss: 298.9526: : 22it [00:17,  1.24it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 0.0407}
epoch 4 , step 21 , loss: 296.3358: : 22it [00:17,  1.24it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 0.0407}
epoch 5 , step 21 , loss: 291.2973: : 22it [00:17,  1.25it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 0.0407}
epoch 6 , step 21 , loss: 285.9999: : 22it [00:17,  1.25it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 0.0407}
epoch 7 , step 21 , loss: 283.4419: : 22it [00:17,  1.25it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 0.0407}
epoch 8 , step 21 , loss: 278.8362: : 22it [00:17,  1.24it/s]
{'weighted_f1': 0.5574, 'micro_f1': 0.6853, 'macro_f1': 

In [None]:
evaluate(bert_model, loaders[0])

### Built-in BertForTokenClassification with labels input, normalize the output logits for classification, use the output loss for training

In [None]:
import torch
from utils.bert_model import BERT_BASE
from utils.utils import prepare, predict, train, evaluate

hparams = {
    'path':'/home/peitian_zhang/Data/NER/test.txt', 
    # 'path':'/home/peitian_zhang/Data/NER/labeled_train.txt',
    'epochs': 10,
    'batch_size': 30,
    'embedding_dim': 768,
    'device':'cuda:0',
    'bert':'bert-base-uncased',
    'seq_length':25,
}

attr_dict, loaders = prepare(hparams)

In [None]:
bert_model = BERT_BASE(hparams, attr_dict['tag2idx']).to(hparams['device'])

In [None]:
bert_model = train(hparams, bert_model, loaders)

In [None]:
evaluate(bert_model, loaders[1])

In [None]:
next(iter(loaders[0]))

### NER pretrained BERT with last hidden state pooling, then directly map the 768 dimensional hidden states to the tagset space, minimize the negative log likelihood for classification

In [6]:
import torch
from utils.bert_model import BERT_NER
from utils.utils import prepare, predict, train, evaluate

hparams = {
    'path':'/home/peitian_zhang/Data/NER/test.txt', 
    # 'path':'/home/peitian_zhang/Data/NER/labeled_train.txt',
    'epochs': 25,
    'batch_size': 30,
    'embedding_dim': 768,
    'device':'cuda:0',
    'bert':'dslim/bert-base-NER', #ckiplab/bert-base-chinese-ner
    'seq_length':30,
}
attr_dict, loaders = prepare(hparams)

In [7]:
bert_model = BERT_NER(hparams, attr_dict['tag2idx']).to(hparams['device'])

In [8]:
bert_model = train(hparams, bert_model, loaders)

epoch 1 , step 0 , loss: 97.4674: : 1it [00:00,  3.98it/s]
{'weighted_f1': 0.1263, 'micro_f1': 0.2667, 'macro_f1': 0.0351}
epoch 2 , step 0 , loss: 73.6373: : 1it [00:00,  4.01it/s]
{'weighted_f1': 0.1153, 'micro_f1': 0.2667, 'macro_f1': 0.0393}
epoch 3 , step 0 , loss: 118.0095: : 1it [00:00,  4.32it/s]
{'weighted_f1': 0.1385, 'micro_f1': 0.3, 'macro_f1': 0.042}
epoch 4 , step 0 , loss: 64.1380: : 1it [00:00,  4.17it/s]
{'weighted_f1': 0.0622, 'micro_f1': 0.0667, 'macro_f1': 0.0242}
epoch 5 , step 0 , loss: 85.1973: : 1it [00:00,  4.27it/s]
{'weighted_f1': 0.1385, 'micro_f1': 0.3, 'macro_f1': 0.042}
epoch 6 , step 0 , loss: 45.5129: : 1it [00:00,  4.50it/s]
{'weighted_f1': 0.1385, 'micro_f1': 0.3, 'macro_f1': 0.042}
epoch 7 , step 0 , loss: 32.6179: : 1it [00:00,  3.99it/s]
{'weighted_f1': 0.1385, 'micro_f1': 0.3, 'macro_f1': 0.042}
epoch 8 , step 0 , loss: 34.8444: : 1it [00:00,  3.88it/s]
{'weighted_f1': 0.1123, 'micro_f1': 0.2667, 'macro_f1': 0.0383}
epoch 9 , step 0 , loss: 60.173

In [10]:
evaluate(bert_model, loaders[0])

              precision    recall  f1-score   support

           2       0.70      1.00      0.82        21
           3       0.00      0.00      0.00         5
          12       0.00      0.00      0.00         1
          13       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1

    accuracy                           0.70        30
   macro avg       0.12      0.17      0.14        30
weighted avg       0.49      0.70      0.58        30



{'weighted_f1': 0.5765, 'micro_f1': 0.7, 'macro_f1': 0.1373}