In [58]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [59]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text,'--', ent.label_,'--',spacy.explain(ent.label_))
    else:
        print('entity not found!')

In [60]:
doc1 = nlp(u'How are you')

In [61]:
show_ents(doc1)

entity not found!


In [62]:
doc2 = nlp(u'may i go to New York next year, to see the Statue of Liberty? I found Pakistan most beautiful country in the World. My name is Mahar Tariq.')

In [63]:
show_ents(doc2)

New York -- GPE -- Countries, cities, states
next year -- DATE -- Absolute or relative dates or periods
the Statue of Liberty -- FAC -- Buildings, airports, highways, bridges, etc.
Pakistan -- GPE -- Countries, cities, states
Mahar Tariq -- PERSON -- People, including fictional


In [64]:
doc3 = nlp(u'Can i please have 500 dollars for Microsoft Stocks?')

In [65]:
show_ents(doc3)

500 dollars -- MONEY -- Monetary values, including unit
Microsoft -- ORG -- Companies, agencies, institutions, etc.


In [66]:
doc4 = nlp(u'Tesla to build Pakistan factory for $16 Million.')
show_ents(doc4)

Pakistan -- GPE -- Countries, cities, states
$16 Million -- MONEY -- Monetary values, including unit


In [67]:
from spacy.tokens import Span

In [68]:
ORG = doc4.vocab.strings[u'ORG']

In [69]:
new_ent = Span(doc4,0,1,label=ORG)

In [70]:
doc4.ents = list(doc4.ents)+[new_ent]

In [71]:
show_ents(doc4)

Tesla -- ORG -- Companies, agencies, institutions, etc.
Pakistan -- GPE -- Countries, cities, states
$16 Million -- MONEY -- Monetary values, including unit


In [72]:
doc5 = nlp(u'Our company created a brand new vaccum cleaner.'
          u'The new vaccum-cleaner is the best in the show.'
          )

In [73]:
show_ents(doc5)

entity not found!


In [74]:
from spacy.matcher import PhraseMatcher

In [75]:
phrase_matcher = PhraseMatcher(nlp.vocab)

In [76]:
phrase_list = ['vaccum cleaner','vaccum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]
phrase_patterns

[vaccum cleaner, vaccum-cleaner]

In [77]:
phrase_matcher.add('newProduct',None,*phrase_patterns)

In [78]:
found_matches = phrase_matcher(doc5)

In [79]:
found_matches

[(4452177204818730156, 6, 8), (4452177204818730156, 11, 14)]

In [80]:
from spacy.tokens import Span

In [81]:
PROD = doc5.vocab.strings[u'PRODUCT']

In [82]:
new_ents = [Span(doc5,match[1],match[2],label=PROD) for match in found_matches]

In [83]:
doc5.ents = list(doc5.ents) + new_ents

In [84]:
show_ents(doc5)

vaccum cleaner -- PRODUCT -- Objects, vehicles, foods, etc. (not services)
vaccum-cleaner -- PRODUCT -- Objects, vehicles, foods, etc. (not services)


In [116]:
doc6 = nlp(u'Originally Amazon paid $29.95 for this toy car to his Mircosoft, but now it is marked down by $10.'
           u'The National Bank of Pakistan has agreed to pay $55.4 million and to provide plans for improving compliance at its New York City branch after U.S.regulators found major deficiencies.')

In [117]:
doc6.ents

(Amazon,
 29.95,
 Mircosoft,
 National Bank of Pakistan,
 $55.4 million,
 New York City)

In [118]:
[ent for ent in doc6.ents if ent.label_=='MONEY']

[29.95, $55.4 million]

In [119]:
from spacy import displacy

In [120]:
displacy.render(doc6,style='ent',jupyter=True)

In [121]:
for sent in doc6.sents:
    displacy.render(doc6,style='ent',jupyter=True)

In [124]:
colors = {'MONEY':'RED','GPE':'YELLOW'}
for sent in doc6.sents:
    displacy.render(doc6,style='ent',jupyter=True,options={'colors':colors})