# Named Entity Recognition (NER)

In [5]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [6]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [11]:
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billion")
for ent in doc.ents:
    print(ent.text, ent.label_, "|", spacy.explain(ent.label_))


Tesla Inc ORG | Companies, agencies, institutions, etc.
Twitter Inc ORG | Companies, agencies, institutions, etc.
$45 billion MONEY | Monetary values, including unit


In [12]:
from spacy import displacy
displacy.render(doc, style="ent")

# List down all entities

In [13]:
nlp.pipe_labels['ner']

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [16]:
doc = nlp("Michael Bloomberg founded Bloomberg in 1982")
for ent in doc.ents:
    print(ent.text, ent.label, "|", spacy.explain(ent.label_))

Michael Bloomberg 380 | People, including fictional
Bloomberg 380 | People, including fictional
1982 391 | Absolute or relative dates or periods


Michael Bloomberg | PERSON | People, including fictional

Bloomberg | GPE | Countries, cities, states

1982 | DATE | Absolute or relative dates or periods

Above it made a mistake in identifying Bloomberg the company. Let's try hugging face for this now.

https://huggingface.co/dslim/bert-base-NER?text=Michael+Bloomberg+founded+Bloomberg+in+1982

# Setting custom entities

In [17]:
doc = nlp("Tesla is going to acquire Twitter for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)


Tesla  |  ORG
Twitter  |  PRODUCT
$45 billion  |  MONEY


In [19]:
doc[2:5]

going to acquire

In [20]:
type(doc[2:5])

spacy.tokens.span.Span

In [24]:
from spacy.tokens import Span

s1 = Span(doc, 0, 1, label="ORG")
s2 = Span(doc, 5, 6, label="ORG")

doc.set_ents([s1, s2], default="unmodified")

In [25]:
for ents in doc.ents:
    print(ents.text, " | ", ents.label_)

Tesla  |  ORG
Twitter  |  ORG
$45 billion  |  MONEY
