# NAMED ENTITY RECOGNITION

In [1]:
# imports

from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

import torch

In [2]:
# initiate a model_id
# hugging face
model_id="dslim/bert-base-NER" 

In [3]:
# initiate tokenizer for tokenization of text
tokenizer_ner = AutoTokenizer.from_pretrained(model_id) # hugging face model

In [4]:
# initiate a named entity recognition model
ner_model = AutoModelForTokenClassification.from_pretrained(model_id)

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
# setup device
device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu'

In [6]:
#  create an nlp pipeline

nlp = pipeline('ner',
              model = ner_model,
              tokenizer = tokenizer_ner,
              aggregation_strategy = 'max',
              device= None)

Device set to use mps:0


In [7]:
nlp('My name is Prashant, I love AI')

[{'entity_group': 'PER',
  'score': 0.9993387,
  'word': 'Prashant',
  'start': 11,
  'end': 19}]

In [8]:
text = "“We formed our partnership with OpenAI around a shared ambition to \
responsibly advance cutting-edge AI research and democratize AI as a new \
technology platform,” said Satya Nadella, Chairman and CEO, Microsoft. \
“In this next phase of our partnership, developers and organizations across \
industries will have access to the best AI infrastructure, models, and toolchain \
with Azure to build and run their applications.”"

In [9]:
nlp(text)

[{'entity_group': 'ORG',
  'score': 0.99865675,
  'word': 'OpenAI',
  'start': 32,
  'end': 38},
 {'entity_group': 'MISC',
  'score': 0.9881143,
  'word': 'AI',
  'start': 100,
  'end': 102},
 {'entity_group': 'MISC',
  'score': 0.9725788,
  'word': 'AI',
  'start': 128,
  'end': 130},
 {'entity_group': 'PER',
  'score': 0.99965036,
  'word': 'Satya Nadella',
  'start': 167,
  'end': 180},
 {'entity_group': 'ORG',
  'score': 0.9988427,
  'word': 'Microsoft',
  'start': 200,
  'end': 209},
 {'entity_group': 'MISC',
  'score': 0.9932892,
  'word': 'AI',
  'start': 327,
  'end': 329},
 {'entity_group': 'ORG',
  'score': 0.9938997,
  'word': 'Azure',
  'start': 373,
  'end': 378}]

In [10]:
# can search using word start and end
text[373:378]

'Azure'