In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [67]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - ' +ent.label_ + ' - '+ str(spacy.explain(ent.label_)))
            
    else:
        print("No entities found")
    

In [4]:
doc = nlp(u"Hi how are you??")

In [5]:
show_ents(doc)

No entities found


In [10]:
doc = nlp(u"May I go to washington, DC next May to see the Washington Monumnet?")

In [11]:
show_ents(doc)

washington - GPE - Countries, cities, states
DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monumnet - GPE - Countries, cities, states


In [12]:
doc = nlp(u"Can I please have a 500 dollars of Microsoft stock?")

In [13]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [14]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")

In [15]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [16]:
from spacy.tokens import Span

In [17]:
ORG = doc.vocab.strings[u"ORG"]

In [18]:
ORG

383

In [19]:
new_ent = Span(doc,0,1,label=ORG)

In [20]:
doc.ents = list(doc.ents) + [new_ent]

In [21]:
doc.ents

(Tesla, U.K., $6 million)

In [22]:
show_ents(doc)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


## Part -2 

In [54]:
doc = nlp(u"Our company created a brand new vaccum cleaner. "
         u"This new vaccum-cleaner is the best in show.")

In [55]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [56]:
phrase_list = ['vaccum cleaner','vaccum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]

In [57]:
phrase_patterns

[vaccum cleaner, vaccum-cleaner]

In [58]:
matcher.add('newproduct',[*phrase_patterns],on_match=None)

In [59]:
found_matches = matcher(doc)
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [60]:
from spacy.tokens import Span
PROD = doc.vocab.strings[u"PRODUCT"]

In [62]:
new_ents = [Span(doc,match[1],match[2],label = PROD) for match in found_matches]

In [63]:
new_ents

[vaccum cleaner, vaccum-cleaner]

In [64]:
doc.ents = list(doc.ents) + new_ents

In [65]:
show_ents(doc)

vaccum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vaccum-cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)


In [70]:
doc = nlp(u"Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.")


In [71]:
show_ents(doc)

29.95 - MONEY - Monetary values, including unit
10 dollars - MONEY - Monetary values, including unit


In [74]:
[ent for ent in doc.ents if ent.label_ == 'MONEY']

[29.95, 10 dollars]

In [None]:
!git add NER_1.ipynb
!git commit -m '16:23/24-01-2022'
!git push origin main