In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - ' + ent.label_ + ' - ' + str(spacy.explain(ent.label_)))
    else:
        print('No entities found.')

In [6]:
doc = nlp(u'Our company created a brand new vacuum cleaner.'
          u'This new vacuum-cleaner is the best in show.')

In [7]:
show_ents(doc)

No entities found.


In [8]:
from spacy.matcher import PhraseMatcher

In [9]:
matcher = PhraseMatcher(nlp.vocab)

In [10]:
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']

In [11]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [12]:
matcher.add('newproduct', None, *phrase_patterns)

In [13]:
found_matches = matcher(doc)

In [14]:
from spacy.tokens import Span

In [15]:
PROD = doc.vocab.strings[u'PRODUCT']

In [16]:
new_ents = [Span(doc, match[1], match[2], label=PROD) for match in found_matches]

In [17]:
doc.ents = list(doc.ents) + new_ents

In [18]:
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [19]:
doc = nlp(u'Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.')

In [20]:
[ent for ent in doc.ents if ent.label_ == 'MONEY']

[29.95, 10 dollars]

In [21]:
len([ent for ent in doc.ents if ent.label_ == 'MONEY'])

2