In [3]:
import spacy

In [6]:
nlp = spacy.load ("en_core_web_sm")

In [7]:
def show_ents (doc):
    if doc.ents:
        for ent in doc.ents:
            print (f"{ent.text} ({ent.label_}) - {spacy.explain (ent.label_)}")
    else:
        print ("No Entities found")

In [8]:
show_ents (nlp("How are you?"))

No Entities found


In [9]:
show_ents (nlp("NewDelhi is the capital of India."))

NewDelhi (ORG) - Companies, agencies, institutions, etc.
India (GPE) - Countries, cities, states


In [10]:
show_ents (nlp("I need 500 dollars to pay my school fees"))

500 dollars (MONEY) - Monetary values, including unit


In [11]:
show_ents (nlp("I need 5000 rupees to pay my school fees"))

5000 (CARDINAL) - Numerals that do not fall under another type


In [12]:
### Adding your entities in the Named Entities

In [13]:
from spacy.tokens import Span

In [14]:
doc = nlp("Tesla is buying an Indian Startup for 6 million dollars.")

In [15]:
ORG = doc.vocab.strings['ORG']

In [16]:
ORG

383

In [17]:
new_entity = Span(doc, 0, 1, label=ORG)

In [18]:
doc.ents

(Indian, 6 million dollars)

In [19]:
doc.ents = list(doc.ents) + [new_entity]

In [20]:
doc.ents

(Tesla, Indian, 6 million dollars)

In [21]:
show_ents (doc)

Tesla (ORG) - Companies, agencies, institutions, etc.
Indian (NORP) - Nationalities or religious or political groups
6 million dollars (MONEY) - Monetary values, including unit


In [22]:
# Adding Multiple Entities in Named Entities

In [23]:
doc = nlp ("My company has developed a vaccum cleaner as vaccum-cleaner is the best in business right now.")

In [24]:
show_ents (doc)

No Entities found


In [25]:
from spacy.matcher import PhraseMatcher

In [27]:
phrases_list = ['vaccum cleaner', 'vaccum-cleaner']

In [28]:
from spacy.tokens import Span

In [29]:
phrases = [nlp(phrase) for phrase in phrases_list]

In [30]:
phrases

[vaccum cleaner, vaccum-cleaner]

In [33]:
matcher = PhraseMatcher (doc.vocab)

In [34]:
type(matcher)

spacy.matcher.phrasematcher.PhraseMatcher

In [37]:
matcher.add ('new_product', None, *phrases)

In [38]:
matcher(doc)

[(9676102616875934564, 5, 7), (9676102616875934564, 8, 11)]

In [39]:
found_matches = matcher(doc)

In [40]:
new_entities = [Span (doc, match[1], match[2]) for match in found_matches]

In [41]:
new_entities

[vaccum cleaner, vaccum-cleaner]

In [49]:
PROD = doc.vocab.strings['PRODUCT']

In [50]:
PROD

386

In [52]:
new_entities = [Span (doc, match[1], match[2], label=PROD) for match in found_matches]

In [57]:
doc.ents = list(doc.ents) + new_entities

In [58]:
show_ents (doc)

vaccum cleaner (PRODUCT) - Objects, vehicles, foods, etc. (not services)
vaccum-cleaner (PRODUCT) - Objects, vehicles, foods, etc. (not services)
