In [2]:
from pathlib import Path

current_dir = Path.cwd()
model_checkpoint_path = current_dir.parent / 'data' / "models" / "checkpoints" / "checkpoint-2500/"

In [3]:
current_dir.parent, model_checkpoint_path

(WindowsPath('C:/projects/sequence-labeling'),
 WindowsPath('C:/projects/sequence-labeling/data/models/checkpoints/checkpoint-2500'))

In [4]:
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForTokenClassification

model_checkpoint = str(model_checkpoint_path)
# model_checkpoint = '../data/models/checkpoints/checkpoint-2500/'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)

ner_pipeline = pipeline('token-classification', model=model, tokenizer=tokenizer,  aggregation_strategy="simple")

In [6]:
ner_pipeline

<transformers.pipelines.token_classification.TokenClassificationPipeline at 0x278c1fa21a0>

In [7]:
%%time
ner_pipeline([
    "Germany's representative to the European Union's veterinary committee Werner Zwingmann said on Wednesday consumers should buy sheepmeat from countries other than Britain until the scientific advice was clearer .",
"Elon Musk on Monday sent a letter to Twitter proposing to follow through with his deal to buy the company at the originally agreed upon price of $54.20 per share, according to a securities filing on Tuesday.",
"In the letter, Musk said he would proceed with the acquisition on the original terms, pending receipt of the debt financing for the deal and provided that the Delaware Chancery Court stay the litigation proceedings over Musk’s initial attempt to pull out of the deal and adjourn the upcoming trial over the dispute.",
"A Twitter spokesperson said in a statement to CNN that the company received Musk’s letter and reiterated its previous statement that the “intention of the Company is to close the transaction at $54.20 per share.”",
"Musk on Tuesday night tweeted: “Buying Twitter is an accelerant to creating X, the everything app.”",
"News of the letter was first reported by Bloomberg earlier on Tuesday.",
"Twitter (TWTR) stock was halted twice, the second time for news pending. After the stock resumed trading, it was up more than 20%, topping $51 a share and approaching the agreed upon deal price for the first time in months.",
"The news comes as the the two sides have been preparing to head to trial in two weeks over Musk’s attempt to terminate of the $44 billion acquisition agreement, which Twitter had sued him to complete.",
" Twitter CEO Parag Agrawal had been set to be deposed by Musk’s lawyers on Monday, and Twitter’s lawyers had planned to depose Musk starting on Thursday."
])

CPU times: total: 93.8 ms
Wall time: 357 ms


[[{'entity_group': 'LOC',
   'score': 0.99837816,
   'word': 'germany',
   'start': 0,
   'end': 7},
  {'entity_group': 'ORG',
   'score': 0.98408186,
   'word': 'european union',
   'start': 32,
   'end': 46},
  {'entity_group': 'PER',
   'score': 0.9977307,
   'word': 'werner zwingmann',
   'start': 70,
   'end': 86},
  {'entity_group': 'LOC',
   'score': 0.997421,
   'word': 'britain',
   'start': 162,
   'end': 169}],
 [{'entity_group': 'PER',
   'score': 0.9801107,
   'word': 'el',
   'start': 0,
   'end': 2},
  {'entity_group': 'PER',
   'score': 0.9715147,
   'word': '##on musk',
   'start': 2,
   'end': 9},
  {'entity_group': 'ORG',
   'score': 0.9206422,
   'word': 'twitter',
   'start': 37,
   'end': 44}],
 [{'entity_group': 'PER',
   'score': 0.99599755,
   'word': 'mu',
   'start': 15,
   'end': 17},
  {'entity_group': 'PER',
   'score': 0.9959708,
   'word': '##sk',
   'start': 17,
   'end': 19},
  {'entity_group': 'ORG',
   'score': 0.96769327,
   'word': 'delaware chance

# Save bento model

In [8]:
import bentoml

bentoml.transformers.save_model(name="ner-conll-base", pipeline=ner_pipeline)

No versions of Flax or Jax are found under the current machine. In order to use Flax with transformers 4.x and above, refers to https://github.com/google/flax#quick-install


Model(tag="ner-conll-base:vcktii2eu6etyaav", path="C:\Users\artif\bentoml\models\ner-conll-base\vcktii2eu6etyaav\")

In [22]:
!bentoml models list

 Tag                     Module                Size        Creation Time       
 ner-conll-base:vcktiiвЂ¦  bentoml.transformers  254.11 MiB  2022-10-05 15:17:19 
 ner:hid2jpsejoublpax    bentoml.transformers  254.11 MiB  2022-10-05 04:15:40 


In [10]:
!bentoml models get ner-conll-base:latest

name: ner-conll-base                                                           
version: vcktii2eu6etyaav                                                      
module: bentoml.transformers                                                   
labels: {}                                                                     
options:                                                                       
  task: token-classification                                                   
  tf: []                                                                       
  pt: []                                                                       
  default: {}                                                                  
  type: null                                                                   
  kwargs: {}                                                                   
metadata: {}                                                                   
context:                                

# Verify model

In [21]:
from transformers.pipelines.token_classification import AggregationStrategy
loaded_model = bentoml.transformers.load_model("ner-conll-base:latest")

loaded_model([
    "News of the letter was first reported by Bloomberg earlier on Tuesday.",
     "Germany's representative to the European Union's veterinary committee Werner Zwingmann said on Wednesday consumers should buy sheepmeat from countries other than Britain until the scientific advice was clearer.",
], aggregation_strategy=AggregationStrategy.SIMPLE)

No versions of Flax or Jax are found under the current machine. In order to use Flax with transformers 4.x and above, refers to https://github.com/google/flax#quick-install
loading configuration file C:\Users\artif\bentoml\models\ner-conll-base\vcktii2eu6etyaav\config.json
Model config DistilBertConfig {
  "_name_or_path": "C:\\Users\\artif\\bentoml\\models\\ner-conll-base\\vcktii2eu6etyaav",
  "activation": "gelu",
  "architectures": [
    "DistilBertForTokenClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "O",
    "1": "B-PER",
    "2": "I-PER",
    "3": "B-ORG",
    "4": "I-ORG",
    "5": "B-LOC",
    "6": "I-LOC",
    "7": "B-MISC",
    "8": "I-MISC"
  },
  "initializer_range": 0.02,
  "label2id": {
    "B-LOC": 5,
    "B-MISC": 7,
    "B-ORG": 3,
    "B-PER": 1,
    "I-LOC": 6,
    "I-MISC": 8,
    "I-ORG": 4,
    "I-PER": 2,
    "O": 0
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert"

[[{'entity_group': 'ORG',
   'score': 0.989792,
   'word': 'bloomberg',
   'start': 41,
   'end': 50}],
 [{'entity_group': 'LOC',
   'score': 0.99837816,
   'word': 'germany',
   'start': 0,
   'end': 7},
  {'entity_group': 'ORG',
   'score': 0.98408186,
   'word': 'european union',
   'start': 32,
   'end': 46},
  {'entity_group': 'PER',
   'score': 0.9977307,
   'word': 'werner zwingmann',
   'start': 70,
   'end': 86},
  {'entity_group': 'LOC',
   'score': 0.997421,
   'word': 'britain',
   'start': 162,
   'end': 169}]]

# Serving the model

In [23]:
%%writefile ner_service.py
import bentoml
from transformers.pipelines.token_classification import AggregationStrategy

from bentoml.io import Text, JSON

runner = bentoml.transformers.get("ner-conll-base:latest").to_runner()

svc = bentoml.Service("ner_service", runners=[runner])

@svc.api(input=Text(), output=JSON())
async def extract_ne(input_series: str) -> list:
    return await runner.async_run(input_series, aggregation_strategy=AggregationStrategy.SIMPLE)

Writing ner_service.py


In [24]:
!bentoml serve ner_service.py:svc --reload

^C


## Build Bento for deployment

In [25]:
%%writefile bentofile.yaml
service: "ner_service.py:svc"
labels:
  owner: haystac
  project: haystac-ner
include:
- "*.py"
python:
  packages:
  - transformers
  - torch


Writing bentofile.yaml
