# Part 1

## Named entity recognition

In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [8]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(f"{ent.text:{30}} {ent.label_:{20}} {spacy.explain(ent.label_)}")
    else:
        print("No entities found.")

In [9]:
doc = nlp(u"Hi how are you!")
show_ents(doc)

No entities found.


In [10]:
doc = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')
show_ents(doc)

Washington, DC                 GPE                  Countries, cities, states
next May                       DATE                 Absolute or relative dates or periods
the Washington Monument        ORG                  Companies, agencies, institutions, etc.


In [11]:
doc = nlp(u'Can I please borrow 500 dollars from you to buy some Microsoft stock?')
show_ents(doc)

500 dollars                    MONEY                Monetary values, including unit
Microsoft                      ORG                  Companies, agencies, institutions, etc.


In [12]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million')
show_ents(doc)

U.K.                           GPE                  Countries, cities, states
$6 million                     MONEY                Monetary values, including unit


## Add new entity

In [13]:
from spacy.tokens import Span

In [14]:
ORG = doc.vocab.strings[u"ORG"]

In [15]:
new_ent = Span(doc,0,1,label=ORG) # 0,1 : Tesla token position in DOC

In [19]:
doc.ents = list(doc.ents) + [new_ent] # append also can be use

In [20]:
show_ents(doc)

Tesla                          ORG                  Companies, agencies, institutions, etc.
U.K.                           GPE                  Countries, cities, states
$6 million                     MONEY                Monetary values, including unit


# Part 2

## Add multiple phrases as name entities

In [22]:
doc = nlp(u'Our company plans to introduce a new vacuum cleaner. '
          u'If successful, the vacuum-cleaner will be our first product.')
show_ents(doc)

first                          ORDINAL              "first", "second", etc.


In [23]:
from spacy.matcher import PhraseMatcher

In [24]:
matcher = PhraseMatcher(nlp.vocab)

In [25]:
phrase_list = ["vacuum cleaner", "vacuum-cleaner"]

In [26]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [27]:
matcher.add("newproduct", None, *phrase_patterns)
#                 |_______________________________ Matcher name (can name as anything)

In [28]:
found_matches = matcher(doc)

In [29]:
found_matches

[(2689272359382549672, 7, 9), (2689272359382549672, 14, 17)]

In [30]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [31]:
new_ents = [Span(doc, match[1],match[2],label=PROD) for match in found_matches]
#                        |        |_______index 2 in tuple
#                      index 1 in tuple

In [33]:
doc.ents = list(doc.ents) + new_ents

In [34]:
show_ents(doc)

vacuum cleaner                 PRODUCT              Objects, vehicles, foods, etc. (not services)
vacuum-cleaner                 PRODUCT              Objects, vehicles, foods, etc. (not services)
first                          ORDINAL              "first", "second", etc.


## Count NERs

In [35]:
doc = nlp(u'Originally priced at $29.50, the sweater was marked down to five dollars.')
show_ents(doc)

29.50                          MONEY                Monetary values, including unit
five dollars                   MONEY                Monetary values, including unit


In [38]:
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[29.50, five dollars]

In [39]:
len([ent for ent in doc.ents if ent.label_ == "MONEY"])

2