In [71]:
import spacy
nlp=spacy.load("en_core_web_sm")

In [72]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(f"{ent.text:{20}} {ent.label_:{15}} {str(spacy.explain(ent.label_)):{10}}")
    else:
        print("No entities found")

In [73]:
doc=nlp(u"Hi how are you!")

In [74]:
show_ents(doc)

No entities found


In [75]:
doc = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')

In [76]:
show_ents(doc)

Washington, DC       GPE             Countries, cities, states
next May             DATE            Absolute or relative dates or periods
the Washington Monument ORG             Companies, agencies, institutions, etc.


In [77]:
doc = nlp(u'Can I please borrow 500 dollars from you to buy some Microsoft stock?')

In [78]:
show_ents(doc)

500 dollars          MONEY           Monetary values, including unit
Microsoft            ORG             Companies, agencies, institutions, etc.


In [79]:
doc=nlp(u"Peter parker is the best spiderman the Marvel got.")

In [80]:
show_ents(doc)

Peter parker         PERSON          People, including fictional
Marvel               ORG             Companies, agencies, institutions, etc.


In [81]:
doc=nlp(u" The team, featuring NBA stars such as LeBron James, Stephen Curry, Kevin Durant, and Joel Embiid, was nicknamed the Avengers due to its high-profile roster. LeBron James was named the tournament's Most Valuable Player (MVP), averaging 14.2 points, 6.8 rebounds, and 8.5 assists per game. Stephen Curry led the team in scoring, averaging 14.8 points per game, and delivered a standout performance in the final, scoring 24 points exclusively from 3-pointers.")

In [82]:
show_ents(doc)

NBA                  ORG             Companies, agencies, institutions, etc.
LeBron James         PERSON          People, including fictional
Stephen Curry        PERSON          People, including fictional
Kevin Durant         PERSON          People, including fictional
Joel Embiid          PERSON          People, including fictional
Avengers             ORG             Companies, agencies, institutions, etc.
LeBron James         PERSON          People, including fictional
Most Valuable Player ORG             Companies, agencies, institutions, etc.
MVP                  ORG             Companies, agencies, institutions, etc.
14.2                 CARDINAL        Numerals that do not fall under another type
6.8                  CARDINAL        Numerals that do not fall under another type
8.5                  CARDINAL        Numerals that do not fall under another type
Stephen Curry        PERSON          People, including fictional
14.8                 CARDINAL        Numerals that do no

In [83]:
from spacy.tokens import Span
# For adding the undefined entity to the particular entity

In [84]:
ORG=doc.vocab.strings[u'ORG']

In [85]:
ORG

383

In [86]:
doc=nlp(u"Tesla to build a U.K factory for $6 million")

In [87]:
show_ents(doc)

U.K                  ORG             Companies, agencies, institutions, etc.
$6 million           MONEY           Monetary values, including unit


In [88]:
new_ents=Span(doc,0,1,label=ORG)

In [None]:
doc.ents=list(doc.ents)+[new_ents]
#doc entities = list of doc entities + the new entities

In [90]:
show_ents(doc)

Tesla                ORG             Companies, agencies, institutions, etc.
U.K                  ORG             Companies, agencies, institutions, etc.
$6 million           MONEY           Monetary values, including unit


In [91]:
doc = nlp(u'Our company plans to introduce a new vacuum cleaner. '
          u'If successful, the vacuum-cleaner will be our first product.')
show_ents(doc)

first                ORDINAL         "first", "second", etc.


In [92]:
from spacy.matcher import PhraseMatcher
matcher=PhraseMatcher(nlp.vocab)

In [93]:
phrase_list=['vacuum cleaner','vacuum-cleaner']

In [94]:
phrase_matcher=[nlp(text) for text in phrase_list]

In [95]:
matcher.add('newproduct',None,*phrase_matcher)

In [96]:
found_matches=matcher(doc)

In [97]:
print(found_matches)

[(2689272359382549672, 7, 9), (2689272359382549672, 14, 17)]


In [98]:
from spacy.tokens import Span

In [100]:
PROD=doc.vocab.strings[u"PRODUCT"]

In [None]:
found_matches #generates the output of tuples

[(2689272359382549672, 7, 9), (2689272359382549672, 14, 17)]

In [103]:
new_ents=[Span(doc,match[1],match[2], label=PROD) for match in found_matches]

In [104]:
new_ents

[vacuum cleaner, vacuum-cleaner]

In [105]:
doc.ents=list(doc.ents)+new_ents

In [106]:
doc.ents

(vacuum cleaner, vacuum-cleaner, first)

In [107]:
show_ents(doc)

vacuum cleaner       PRODUCT         Objects, vehicles, foods, etc. (not services)
vacuum-cleaner       PRODUCT         Objects, vehicles, foods, etc. (not services)
first                ORDINAL         "first", "second", etc.


In [108]:
doc = nlp(u'Originally priced at $29.50, the sweater was marked down to five dollars.')

In [111]:
len([ent for ent in doc.ents if ent.label_=="MONEY"])

2