In [1]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

# show NER results
from spacy import displacy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
pipe = pipeline("ner", model=model, tokenizer=tokenizer)

Downloading (…)okenizer_config.json: 100%|██████████| 59.0/59.0 [00:00<00:00, 40.1kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 829/829 [00:00<00:00, 323kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 213k/213k [00:00<00:00, 526kB/s]
Downloading (…)in/added_tokens.json: 100%|██████████| 2.00/2.00 [00:00<00:00, 918B/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 42.6kB/s]
Downloading pytorch_model.bin: 100%|██████████| 433M/433M [02:01<00:00, 3.58MB/s] 


In [3]:
text = "Hello! I'm Max. I like manga and fried chicken."

ner_results = pipe(text)
print(ner_results)

[{'entity': 'B-PER', 'score': 0.9948801, 'index': 6, 'word': 'Max', 'start': 11, 'end': 14}]


In [4]:
def from_ner_results_to_displacy(text, ner_results):
  d_result = { "text": text, "title": None } 
  ents = []
  current_entity = None
  for ent in ner_results:
    if "B-" in ent["entity"]:
      if current_entity != None:
        ents.append(current_entity)
      entity_label = ent["entity"][2:]
      current_entity = {
        "label": entity_label,
        "start": ent["start"],
        "end": ent["end"]
      }
    if "I-" in ent["entity"]:
      current_entity["end"] = ent["end"]
  if current_entity != None:
    ents.append(current_entity)
  d_result["ents"] = ents
  return d_result

In [6]:
text = "Hello! I'm Max. I like manga and fried chicken."
ner_results = pipe(text)
d_displacy = from_ner_results_to_displacy(text, ner_results)
print(d_displacy)

{'text': "Hello! I'm Max. I like manga and fried chicken.", 'title': None, 'ents': [{'label': 'PER', 'start': 11, 'end': 14}]}


In [7]:
displacy.render(d_displacy, style="ent", manual=True)