In [0]:
import spacy

In [0]:
nlp = spacy.load('en_core_web_sm')

In [0]:
def show_ent(doc):
  if doc.ents:
    for ent in doc.ents:
      print(ent.text + '---' + ent.label_+ '---' + str(spacy.explain(ent.label_)))
  else:
    print("NO entity found")

In [0]:
doc = nlp(u'Hi how are you?')

In [5]:
show_ent(doc)

NO entity found


In [0]:
doc = nlp(u'I love going to Mumbai in Novmeber to see Gateway of INDAIA!')

In [7]:
show_ent(doc)

Mumbai---GPE---Countries, cities, states
Novmeber---GPE---Countries, cities, states
Gateway of INDAIA---GPE---Countries, cities, states


In [0]:
doc = nlp(u"Can i Have 500 dollars of Apple")

In [9]:
show_ent(doc)

500 dollars---MONEY---Monetary values, including unit
Apple---ORG---Companies, agencies, institutions, etc.


In [0]:
doc = nlp(u"Tesla to build a factory in India for 5 million.")

In [11]:
show_ent(doc)

India---GPE---Countries, cities, states
5 million---CARDINAL---Numerals that do not fall under another type


In [0]:
from spacy.tokens import Span

In [0]:
ORG = doc.vocab.strings[u"ORG"]

In [14]:
ORG

383

In [0]:
new_ent = Span(doc,0,1,label=ORG)

In [16]:
new_ent

Tesla

In [0]:
doc.ents = list(doc.ents) + [new_ent]

In [18]:
show_ent(doc)

Tesla---ORG---Companies, agencies, institutions, etc.
India---GPE---Countries, cities, states
5 million---CARDINAL---Numerals that do not fall under another type


In [0]:
doc = nlp(u"Our company created a new vacuum cleaner."
          u"This new vacuum-cleaner is the best")

In [21]:
show_ent(doc)

NO entity found


In [0]:
from spacy.matcher import PhraseMatcher

In [0]:
matcher =PhraseMatcher(nlp.vocab)

In [0]:
phrase = ['vacuum cleaner','vacuum-cleaner']

In [0]:
phrase_pattern = [nlp(text) for text in phrase]

In [26]:
phrase_pattern

[vacuum cleaner, vacuum-cleaner]

In [0]:
matcher.add('newproduct',None,*phrase_pattern)

In [0]:
found = matcher(doc)

In [30]:
found

[(2689272359382549672, 5, 7), (2689272359382549672, 10, 13)]

In [0]:
from spacy.tokens import Span

In [0]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [0]:
new_ent = [Span(doc,match[1],match[2],label = PROD) for match in found]

In [36]:
new_ent

[vacuum cleaner, vacuum-cleaner]

In [0]:
doc.ents = list(doc.ents) + new_ent

In [39]:
show_ent(doc)

vacuum cleaner---PRODUCT---Objects, vehicles, foods, etc. (not services)
vacuum-cleaner---PRODUCT---Objects, vehicles, foods, etc. (not services)


In [0]:
doc = nlp(u"Orignal The cost was $23.33 for fuel. But not the cost is $30.")

In [43]:
[ent for ent in doc.ents if ent.label_=="MONEY"]

[23.33, 30]