## 3.2 Predict using Python pipeline

Alternatively, you can use a Python way to describe and build your model for prediction, without using the config file.

In [1]:
from pathlib import Path

from deeppavlov import Element, Model
from deeppavlov.core.data.simple_vocab import SimpleVocabulary
from deeppavlov.download import download_resource
from deeppavlov.models.classifiers.proba2labels import Proba2Labels
from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersNerPreprocessor
from deeppavlov.models.torch_bert.torch_transformers_sequence_tagger import TorchTransformersSequenceTagger


transformer = "bert-base-cased"
model_path = Path('./ner_ontonotes_bert_torch/' + transformer)

download_resource(
    'http://files.deeppavlov.ai/v1/ner/ner_ontonotes_bert_torch.tar.gz',
    {'./ner_ontonotes_bert_torch'}
)

preprocessor = TorchTransformersNerPreprocessor(
    vocab_file=transformer,
    do_lower_case=False,
    max_seq_length=512,
    max_subword_length=15,
    token_masking_prob=0.0,
)
 
classes_vocab = SimpleVocabulary(
    save_path=model_path/'tag.dict',
    load_path=model_path/'tag.dict',
    pad_with_zeros=True,
    unk_token=["O"]
)

tagger = TorchTransformersSequenceTagger(
    n_tags=classes_vocab.len,
    return_probas=False,
    use_crf=True,
    attention_probs_keep_prob=0.5,
    encoder_layer_ids=[-1],
    pretrained_bert='bert-base-cased',
    save_path=model_path/'model',
    load_path=model_path/'model',
    optimizer='AdamW',
    optimizer_parameters={'lr': 2e-05, 
                          "weight_decay": 1e-06, 
                          "betas": [0.9, 0.999],
                          "eps": 1e-06},
    clip_norm=1.0,
    min_learning_rate=1e-07,
    learning_rate_drop_patience=30,
    learning_rate_drop_div=1.5,
    load_before_drop=True,
)

ner_model = Model(
    x=['x'],
    out=["x_tokens", "y_pred"],
    pipe=[
        Element(component=preprocessor, 
                x=['x'], 
                out=["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask"]),
        
        Element(component=classes_vocab, 
                x=["y"], 
                out=["y_ind"]),
        
        Element(component=tagger, 
                x=["x_subword_tok_ids", "attention_mask", "startofword_markers"], 
                out=["y_pred_ind"]),
        
        Element(component=classes_vocab, 
                x=["y_pred_ind"], 
                out=["y_pred"])
    ]
)

In [None]:
ner_model(['Bob Ross lived in Florida', 'Elon Musk founded Tesla'])

[[['Bob', 'Ross', 'lived', 'in', 'Florida'],
  ['Elon', 'Musk', 'founded', 'Tesla']],
 [['B-PERSON', 'I-PERSON', 'O', 'O', 'B-GPE'],
  ['B-PERSON', 'I-PERSON', 'O', 'B-ORG']]]

## 3.2 Predict using Python pipeline

Alternatively, you can use a Python way to describe and build your model for prediction, without using the config file.

In [None]:
from pathlib import Path

from deeppavlov import Element, Model
from deeppavlov.download import download_resource
from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchSquadTransformersPreprocessor
from deeppavlov.models.torch_bert.torch_transformers_sequence_tagger import TorchTransformersSequenceTagger
from deeppavlov.models.preprocessors.squad_preprocessor import SquadBertAnsPreprocessor, SquadBertMappingPreprocessor, SquadBertAnsPostprocessor
from deeppavlov.models.torch_bert.torch_transformers_squad import TorchTransformersSquad


transformer = 'bert-base-uncased'
model_path = Path('./squad_torch_bert')
lowercase = True 

download_resource(
    'http://files.deeppavlov.ai/v1/squad/squad_torch_bert.tar.gz',
    {'./squad_torch_bert'}
)

in_preprocessor = TorchSquadTransformersPreprocessor(
    vocab_file=transformer,
    do_lower_case=lowercase,
    max_seq_length=384,
    return_tokens=True
)
 
mapping = SquadBertMappingPreprocessor(
    do_lower_case=lowercase
)

transformer = TorchTransformersSquad(
    pretrained_bert=transformer,
    save_path=model_path/'model',
    load_path=model_path/'model',
    optimizer='AdamW',
    optimizer_parameters={
        'lr': 2e-05,
        'weight_decay': 0.01,
        'betas': [
            0.9,
            0.999
        ],
        'eps': 1e-06
    },
    learning_rate_drop_patience=2,
    learning_rate_drop_div=2.0,
)

ans_postprocessor = SquadBertAnsPostprocessor()

model = Model(
    x=['context_raw', 'question_raw'],
    out=['ans_predicted', 'ans_start_predicted',
      "logits"],
    pipe=[
        Element(component=in_preprocessor, 
                x=['context_raw', 'question_raw'], 
                out=['bert_features', 'subtokens']),
          
        Element(component=mapping, 
                x=['context_raw', 'bert_features', 'subtokens'], 
                out=['subtok2chars', 'char2subtoks']),
          
        Element(component=transformer, 
                x=['bert_features'], 
                out=['ans_start_predicted', 'ans_end_predicted', 'logits']),
        
        Element(component=ans_postprocessor, 
                x=['ans_start_predicted', 'ans_end_predicted', 'context_raw', 'bert_features', 'subtok2chars', 'subtokens'], 
                out=['ans_predicted', 'ans_start_predicted', 'ans_end_predicted'])
    ]
)

In [None]:
model(['DeepPavlov is library for NLP and dialog systems.'], ['What is DeepPavlov?'])

[['library for NLP and dialog systems'], [14], [348392.0]]