In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - ' + ent.label_ + ' - ' + str(spacy.explain(ent.label_)))
    else:
        print('No entities found')

In [3]:
doc = nlp(u'Hi how are you?')

In [4]:
show_ents(doc)

No entities found


In [7]:
doc2 = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")

In [8]:
show_ents(doc2)

Washington - GPE - Countries, cities, states
DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [10]:
doc3 = nlp(u"Can i please have 500 dollars of Microsoft stock?")

In [11]:
show_ents(doc3)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [12]:
doc4 = nlp(u"Tesla to build a U.K factory for $6 million")

In [13]:
show_ents(doc4)

U.K - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [14]:
from spacy.tokens import Span

In [15]:
ORG = doc.vocab.strings[u"ORG"]

In [16]:
ORG

383

In [19]:
new_ent = Span(doc4, 0, 1, label=ORG)

In [20]:
doc4.ents = list(doc4.ents) + [new_ent]

In [21]:
show_ents(doc4)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [31]:
doc5 = nlp(u"Our company created a brand new vaccum cleaner"
          u"This new vaccum-cleaner is the best in show")

In [33]:
show_ents(doc5)

No entities found


In [24]:
from spacy.matcher import PhraseMatcher

In [34]:
matcher = PhraseMatcher(nlp.vocab)

In [56]:
phrase_list = ['vaccum cleaner', 'vaccum-cleaner']

In [57]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [58]:
matcher.add('newproduct',phrase_patterns)

In [59]:
found_matches = matcher(doc5)

In [60]:
found_matches

[(2689272359382549672, 9, 12)]

In [61]:
from spacy.tokens import Span

In [65]:
PROD = doc5.vocab.strings[u"PRODUCT"]

In [66]:
new_ents = [ Span(doc5,match[1], match[2] , label=PROD) for match in found_matches]

In [67]:
doc5.ents = list(doc5.ents) + new_ents 

In [68]:
show_ents(doc5)

vaccum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [74]:
doc6 = nlp(u"Now $5, later $10")

In [76]:
len([ent for ent in doc6.ents if ent.label_ == 'MONEY'])

2