### Base BERT with second-to-last pooling for word representation, then apply CRF to calculate sentence score, and minimize the negative log likelihood to train

In [None]:
import torch
from utils.bert_model import BERT_CRF
from utils.utils import prepare, predict, train, evaluate

hparams = {
    # 'path':'data/data_test_en.txt', 
    'path':'data/data.txt',
    'epochs': 100,
    'batch_size': 32,
    'embedding_dim': 768,
    'device':'cuda:1',
    'bert':'bert-base-chinese',
    'seq_length':20,
    'learning_rate': 3e-5,
    'save_path':'model_params/bert_base_model'
}

attr_dict, loaders = prepare(hparams)

In [None]:
bert_model = BERT_CRF(hparams,attr_dict['tag2idx']).to(hparams['device'])

In [None]:
bert_model = train(hparams, bert_model, loaders, lr=hparams['learning_rate'], schedule=True)

In [None]:
evaluate(bert_model, loaders[1])

In [None]:
torch.save(bert_model.state_dict(),hparams['save_path'])

In [None]:
# bert_model.load_state_dict(torch.load(hparams['save_path']))

In [None]:
predict(['中国人民大学第三十五届一二九合唱音乐节如期举行，信息学院分团委文化部将组织信院全体同学参加','张配天获得最佳论文奖'], bert_model, attr_dict['tokenizer'], 50)

### Built-in BertForTokenClassification with labels input, normalize the output logits for classification, use the output loss for training

In [None]:
import torch
from utils.bert_model import BERT_BASE
from utils.utils import prepare, predict, train, evaluate

hparams = {
    # 'path':'data/data_test_en.txt', 
    'path':'data/data.txt',
    'epochs': 100,
    'batch_size': 40,
    'embedding_dim': 768,
    'device': 'cuda:1',
    'bert': 'bert-base-chinese',
    'seq_length': 256,
    'learning_rate': 3e-5
}

attr_dict, loaders = prepare(hparams)

In [None]:
bert_model = BERT_BASE(hparams, attr_dict['tag2idx']).to(hparams['device'])

In [None]:
bert_model = train(hparams, bert_model, loaders, lr=hparams['learning_rate'], schedule=True)

In [None]:
evaluate(bert_model, loaders[1])

### NER pretrained BERT with last hidden state pooling, then directly map the 768 dimensional hidden states to the tagset space, minimize the negative log likelihood for classification

In [None]:
import torch
from utils.bert_model import BERT_NER
from utils.utils import prepare, predict, train, evaluate

hparams = {
    # 'path':'data/data_test_en.txt', 
    'path':'data/data.txt',
    'epochs': 100,
    'batch_size': 32,
    'embedding_dim': 768,
    'device':'cuda:1',
    # 'bert':'dslim/bert-base-NER', 
    'bert': 'ckiplab/bert-base-chinese-ner',
    'seq_length': 256,
    'learning_rate': 3e-5
}
attr_dict, loaders = prepare(hparams)

In [None]:
bert_model = BERT_NER(hparams, attr_dict['tag2idx']).to(hparams['device'])

In [None]:
bert_model = train(hparams, bert_model, loaders, lr=hparams['learning_rate'], schedule=True)

In [None]:
evaluate(bert_model, loaders[0])