# NER
NER(Named Entity Recognition) seeks to locate and classify named entity mentions in unstructured text into pre-defined categories sucs as the person names,
organisation, location, medical codes, time expressions, quantities, monetary value, precentage.

In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
def show_ent(doc):
    if doc.ents:
        for ent in doc.ents:
            print(f"{ent.text:10} {ent.label_:10} {spacy.explain(ent.label_):20}")
    else:
        print("no entity found")

In [13]:
doc1 = nlp("Hi how are you")

spacy.lang.en.English

In [4]:
show_ent(doc1)

no entity found


In [5]:
doc2 = nlp("Apple fells on Newton's head")
show_ent(doc2)

Apple      ORG        Companies, agencies, institutions, etc.
Newton     GPE        Countries, cities, states


In [6]:
doc3 = nlp("Apple is sweet")
show_ent(doc3)

print("*******************************************")
from spacy.tokens import Span
PRODUCT = doc3.vocab.strings[u'PRODUCT']
new_ent = Span(doc3, 0,1, label=PRODUCT)

doc3.ents = list(doc3.ents) + [new_ent]

show_ent(doc3)

Apple      ORG        Companies, agencies, institutions, etc.
*******************************************
Apple      PRODUCT    Objects, vehicles, foods, etc. (not services)


In [7]:
doc4 = nlp("Hulk was freeking awsome in Avenger movie")
show_ent(doc4)



Avenger    GPE        Countries, cities, states


In [14]:
doc5 = nlp(u"Tesla to build a U.K. factory for $6 billion")
show_ent(doc5)
print("*******************************************")


U.K.       GPE        Countries, cities, states
$6 billion MONEY      Monetary values, including unit
*******************************************


In [33]:
ORG = doc5.vocab.strings[u'ORG']


In [37]:
new_ent = Span(doc5, 0,1, label=ORG)

In [38]:
doc5.ents = list(doc5.ents) + [new_ent]
show_ent(doc5)

Tesla      ORG        Companies, agencies, institutions, etc.
U.K.       GPE        Countries, cities, states
$6 billion MONEY      Monetary values, including unit


In [125]:
doc = nlp(u"our company created a brabd new vacuum cleaner."
           "This vacuum-cleaner is the best in show")
show_ent(doc)

no entity found


In [147]:
from  spacy.matcher import PhraseMatcher

In [148]:
matcher = PhraseMatcher(nlp.vocab)

In [149]:
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']

In [153]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [155]:
matcher.add('newproduct', None, *phrase_patterns)

In [156]:
# from  spacy.matcher import Matcher
# matcher = Matcher(nlp.vocab)
# pattern = [{'LOWER':'vaccum', 'IS_PUNCT':False, 'LOWER':'cleaner'}]

In [157]:
found_matcher = matcher(doc)

In [158]:
found_matcher

[(2689272359382549672, 6, 8), (2689272359382549672, 10, 13)]

In [159]:
from spacy.tokens import Span

In [160]:
PROD = doc.vocab.strings[u'PRODUCT']

In [161]:
new_ents = [Span(doc, match[1], match[2], label=PROD)
            for match in found_matcher]
new_ents

[vacuum cleaner, vacuum-cleaner]

In [162]:
doc.ents = list(doc.ents) + new_ents

In [163]:
show_ent(doc)

vacuum cleaner PRODUCT    Objects, vehicles, foods, etc. (not services)
vacuum-cleaner PRODUCT    Objects, vehicles, foods, etc. (not services)


In [168]:
[print(ent) for ent in doc.ents]

vacuum cleaner
vacuum-cleaner


[None, None]

In [169]:
from spacy import displacy

In [189]:
doc = nlp(u"TATA manufacture NANO in Gujarat" )
# doc = nlp(u"Dandia Night was great , awsome singers" )

In [190]:
displacy.render(doc, style='ent', jupyter=True)