## Named Entity Recognition (NER)

In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [4]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print('No Entities Found')

In [5]:
doc = nlp(u"Hi How are You?")

In [6]:
show_ents(doc)

No Entities Found


In [7]:
doc = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')

In [8]:
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [9]:
doc = nlp(u'Can I please borrow 500 dollars from you to buy some Microsoft stock?')

In [10]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


## Adding Tesla as an Entity to label ORG

In [11]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million')

In [12]:
show_ents(doc) # We want Tesla to be an entity in ORG

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [20]:
from spacy.tokens import Span

In [21]:
ORG = doc.vocab.strings[u"ORG"]

In [22]:
print(ORG)

383


In [25]:
new_ent = Span(doc,0,1,label=ORG) # ORG label assigned to Tesla

In [26]:
doc_ents = list(doc.ents) + [new_ent]

In [27]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [29]:
list(doc_ents)

[U.K., $6 million, Tesla]

## Adding Named Entities to All Matching Spans

In [32]:
doc = nlp(u'Our company created a brand new vacuum cleaner.'
          u'This new vacuum-cleaner is the best in show.')

In [33]:
show_ents(doc)

No Entities Found


In [34]:
from spacy.matcher import PhraseMatcher

In [35]:
matcher = PhraseMatcher(nlp.vocab)

In [36]:
# Create a desired list of patterns
phrase_list = ['vacuum cleaner','vacuum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list] # Passing it to a doc object

In [37]:
# Create a Matcher
matcher.add('newproduct',None,*phrase_patterns)

In [38]:
found_matches = matcher(doc)

In [39]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [44]:
from spacy.tokens import Span

PROD = doc.vocab.strings[u"PRODUCT"]

new_ents = [Span(doc,match[1],match[2],label=PROD) for match in found_matches]

In [45]:
doc.ents  = list(doc.ents) + new_ents

In [46]:
doc.ents

(vacuum cleaner, vacuum-cleaner)

In [51]:
doc.ents[0].label_

'PRODUCT'

In [52]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


## Counting Entities

In [53]:
doc = nlp(u"Originally i paid $29.95 for this car toy , but now its is marked down by 10 dollars.")

In [55]:
# How may Money type entities are there in the doc

[ent for ent in doc.ents if ent.label_ == 'MONEY']

[29.95, 10 dollars]

In [56]:
len([ent for ent in doc.ents if ent.label_ == 'MONEY'])

2