In [1]:
#Named entity recognition refers to the identification of words in a sentence as an entity
#e.g. the name of a person, place, organization, etc
import spacy
import en_core_web_sm

In [2]:
sp= en_core_web_sm.load()
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million')

In [3]:
#To find the named entity we use the ents attribute
print(sen.ents)

(Manchester United, Harry Kane, $90 million)


In [4]:
#To see the detail of each named entity, you use the text, label, and the spacy.explain
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United - PERSON - People, including fictional
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit


In [5]:
#Adding New Entities
sen = sp(u'HHHHH is setting up a new company in India')
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

India - GPE - Countries, cities, states


In [6]:
#Now to add "HHHHH" as an entity of type "ORG" to our document
from spacy.tokens import Span
ORG = sen.vocab.strings[u'ORG']
new_entity = Span(sen, 0, 1, label=ORG)
sen.ents = list(sen.ents) + [new_entity]

In [7]:
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

HHHHH - ORG - Companies, agencies, institutions, etc.
India - GPE - Countries, cities, states


In [8]:
#Counting Entities
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David demand 100 Million Dollars')
for entity in sen.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United - PERSON - People, including fictional
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit
David - PERSON - People, including fictional
100 Million Dollars - MONEY - Monetary values, including unit


In [9]:
len([ent for ent in sen.ents if ent.label_=='ORG'])

0

In [10]:
#Visualizing Named Entities
from spacy import displacy
sen = sp(u'Manchester United is looking to sign Harry Kane for $90 million. David demand 100 Million Dollars')
displacy.render(sen, style='ent', jupyter=True)

In [None]:
filter = {'ents': ['ORG']}
displacy.render(sen, style='ent', jupyter=True, options=filter)