In [20]:
from deeppavlov.core.common.chainer import Chainer

class Component():
    def __init__(self, component, inp, out):
        self.component = component
        self.inp = inp
        self.out = out


class Model():
    def __init__(self, inp, out, chainer):
        self.model = Chainer(inp, out, None)
        for component in chainer:
            self.model.append(component.component, component.inp, component.out, None, False)
    def __call__(self, *args, **kwargs):
        return self.model(*args, **kwargs)



In [21]:
from deeppavlov.models.preprocessors.bert_preprocessor import BertNerPreprocessor
from deeppavlov.core.data.simple_vocab import SimpleVocabulary
from deeppavlov.models.bert.bert_sequence_tagger import BertSequenceTagger

preprocessor = BertNerPreprocessor(
    vocab_file="~/.deeppavlov/downloads/bert_models/rubert_cased_L-12_H-768_A-12_v1/vocab.txt",
    do_lower_case=False,
    max_seq_length=512,
    max_subword_length=15,
    token_masking_prob=0.0
)

tag_vocab = SimpleVocabulary(
    unk_token=["O"],
    pad_with_zeros=True,
    save_path="~/.deeppavlov/models/ner_rus_bert/tag.dict",
    load_path="~/.deeppavlov/models/ner_rus_bert/tag.dict",
    fit_on=["y"]
)

seq_tagger = BertSequenceTagger(
    n_tags=tag_vocab.len,
    keep_prob=0.1,
    bert_config_file="~/.deeppavlov/downloads/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_config.json",
    pretrained_bert="~/.deeppavlov/downloads/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_model.ckpt",
    attention_probs_keep_prob=0.5,
    use_crf=True,
    ema_decay=0.9,
    return_probas=False,
    encoder_layer_ids=[-1],
    optimizer="tf.train:AdamOptimizer",
    learning_rate=1e-3,
    bert_learning_rate=2e-5,
    min_learning_rate=1e-7,
    learning_rate_drop_patience=30,
    learning_rate_drop_div=1.5,
    load_before_drop=True,
    clip_norm=None,
    save_path="~/.deeppavlov/models/ner_rus_bert/model",
    load_path="~/.deeppavlov/models/ner_rus_bert/model",
)


model = Model(
    inp=["x"],
    out=["x_tokens", "y_pred"],
    chainer=[
        Component(preprocessor, inp=["x"], out=["x_tokens", "x_subword_tokens", "x_subword_tok_ids",
                                                "startofword_markers", "attention_mask"]),
        Component(tag_vocab, inp=["y"], out=["y_ind"]),
        Component(seq_tagger, inp=["x_subword_tok_ids", "attention_mask", "startofword_markers"],
                  out=["y_pred_ind"]),
        Component(tag_vocab, inp=["y_pred_ind"], out=["y_pred"])
    ]
)

model(['Москва - столица России'])

2021-05-18 18:00:04.834 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 115: [loading vocabulary from /data/dp_components/models/ner_rus_bert/tag.dict]
2021-05-18 18:00:18.84 INFO in 'deeppavlov.core.models.tf_model'['tf_model'] at line 51: [loading model from /data/dp_components/models/ner_rus_bert/model]


INFO:tensorflow:Restoring parameters from /data/dp_components/models/ner_rus_bert/model


[[['Москва', '-', 'столица', 'России']], [['B-LOC', 'O', 'O', 'B-LOC']]]