## Import Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')
import spacy
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u'I had visited my home town last Diwali!')

In [4]:
print(doc.text)

I had visited my home town last Diwali!


In [6]:
print(doc[2].tag_)

VBN


In [8]:
print(doc[2].pos_)

VERB


In [11]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)} ")

I          PRON       PRP        pronoun, personal 
had        AUX        VBD        verb, past tense 
visited    VERB       VBN        verb, past participle 
my         DET        PRP$       pronoun, possessive 
home       NOUN       NN         noun, singular or mass 
town       NOUN       NN         noun, singular or mass 
last       ADJ        JJ         adjective 
Diwali     PROPN      NNP        noun, proper singular 
!          PUNCT      .          punctuation mark, sentence closer 


In [25]:
doc1 = nlp(u'I read book on Machine Learning.')

In [26]:
doc1[1].text

'read'

In [27]:
for token in doc1:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)} ")

I          PRON       PRP        pronoun, personal 
read       VERB       VBP        verb, non-3rd person singular present 
book       NOUN       NN         noun, singular or mass 
on         ADP        IN         conjunction, subordinating or preposition 
Machine    PROPN      NNP        noun, proper singular 
Learning   PROPN      NNP        noun, proper singular 
.          PUNCT      .          punctuation mark, sentence closer 


In [30]:
doc2 = nlp(u'I read a articles om Machine Learning')

In [31]:
for token in doc2:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)} ")

I          PRON       PRP        pronoun, personal 
read       VERB       VBD        verb, past tense 
a          DET        DT         determiner 
articles   NOUN       NNS        noun, plural 
om         VERB       VBZ        verb, 3rd person singular present 
Machine    NOUN       NN         noun, singular or mass 
Learning   PROPN      NNP        noun, proper singular 


In [39]:
pos_counts = doc2.count_by(spacy.attrs.POS)

In [33]:
doc2.vocab[100].text

'VERB'

In [43]:
doc2[2].pos

90

In [52]:
for k,v in sorted(pos_counts.items()):
    print(f"{k}. {doc2.vocab[k].text:{5}} {v}")

90. DET   1
92. NOUN  2
95. PRON  1
96. PROPN 1
100. VERB  2


### NER

In [13]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' -- ' + ent.label_ + ' - ' +str(spacy.explain(ent.label_)))
    else:
        print("Entities not found")

In [7]:
doc = nlp(u'Hey, How are you!')

In [8]:
show_ents(doc)

Entities not found


In [9]:
doc = nlp(u'Shall I go to Bangalore next march to see the Board of Cricket?')

In [12]:
show_ents(doc)

Bangalore -- GPE - Countries, cities, states
next march -- DATE - Absolute or relative dates or periods
the Board of Cricket -- ORG - Companies, agencies, institutions, etc.


In [16]:
doc = nlp(u' Can I have 2000 rupees for Infosys stock?')

In [17]:
show_ents(doc)

2000 -- CARDINAL - Numerals that do not fall under another type
Infosys -- ORG - Companies, agencies, institutions, etc.


In [18]:
doc = nlp(u'TCS to buld India factory for $2 million')

In [19]:
show_ents(doc)

India -- GPE - Countries, cities, states
$2 million -- MONEY - Monetary values, including unit


In [20]:
from spacy.tokens import Span

In [21]:
ORG = doc.vocab.strings[u'ORG']

In [22]:
ORG

383

In [24]:
new_ent = Span(doc, 0,1,label=ORG)

In [25]:
doc.ents = list(doc.ents) + [new_ent]

In [26]:
show_ents(doc)

TCS -- ORG - Companies, agencies, institutions, etc.
India -- GPE - Countries, cities, states
$2 million -- MONEY - Monetary values, including unit


In [19]:
doc = nlp(u"Our company created a brand new air purifier."
          "This air-purifier is best in market!")

In [14]:
show_ents(doc)

Entities not found


In [15]:
# import phrasematcher
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

In [16]:
phrase_list = ['air purifier', 'air-purifier']
phrase_pattern = [nlp(text) for text in phrase_list]

In [17]:
matcher.add('product', None,*phrase_pattern)

In [20]:
matcher(doc)

[(2104994216896503478, 6, 8), (2104994216896503478, 10, 13)]

In [30]:
found_matches = matcher(doc)

In [22]:
from spacy.tokens import Span

In [24]:
PROD = doc.vocab.strings[u'PRODUCT']

In [31]:
new_ents = [Span(doc,match[1],match[2],label=PROD) for match in found_matches]

In [32]:
doc.ents = list(doc.ents) + new_ents
show_ents(doc)

air purifier -- PRODUCT - Objects, vehicles, foods, etc. (not services)
air-purifier -- PRODUCT - Objects, vehicles, foods, etc. (not services)


In [37]:
doc = nlp(u'I had brought a toy for $10.90 in Amazon but now price had decreased by $5')

In [38]:
[ent for ent in doc.ents if ent.label_ == "MONEY"]

[10.90, 5]

In [39]:
# visualize the NER
from spacy import displacy

In [67]:
doc = nlp(u"Over the last decade Amazon had sold nealy 50 million fast food and grocery worth $100 million."
         u"BY contrast Alibaba sold 10 million products that is worth of $30 million.")

In [68]:
displacy.render(doc,style='ent', jupyter=True)

In [70]:
for sent in doc.sents:
    displacy.render(nlp(sent.text),style='ent', jupyter=True)

In [85]:
colors = {'ORG': 'linera-gradient(90deg, red, orange)'}
options = {'ents':['MONEY','ORG'], 'colors': colors}

In [84]:
displacy.render(doc,style='ent', jupyter=True, options=options)