In [4]:
import spacy
nlp= spacy.load('en_core_web_sm')
doc=nlp(u'A quick brown fox jumps over a lazy dog')

for token in doc:
    print(token.text,'\t',token.pos_,'\t',token.tag_,'\t',spacy.explain(token.tag_))

A 	 DET 	 DT 	 determiner
quick 	 ADJ 	 JJ 	 adjective (English), other noun-modifier (Chinese)
brown 	 ADJ 	 JJ 	 adjective (English), other noun-modifier (Chinese)
fox 	 NOUN 	 NN 	 noun, singular or mass
jumps 	 VERB 	 VBZ 	 verb, 3rd person singular present
over 	 ADP 	 IN 	 conjunction, subordinating or preposition
a 	 DET 	 DT 	 determiner
lazy 	 ADJ 	 JJ 	 adjective (English), other noun-modifier (Chinese)
dog 	 NOUN 	 NN 	 noun, singular or mass


In [9]:
#POS Count:
pos_count= doc.count_by(spacy.attrs.POS)
pos_count #it is an object, with pos numbers as keys
#to know the POS name: doc.vocab[number].text and it'll print the POS name at that location
for k,v in sorted(pos_count.items()):
    print(f'{k} {doc.vocab[k].text:{5}} {v} ')

84 ADJ   3 
85 ADP   1 
90 DET   2 
92 NOUN  2 
100 VERB  1 


In [10]:
#to calculate the tag count:
tag_count = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(tag_count.items()):
    print(f"{k} {doc.vocab[k].text:{5}} {v}")

1292078113972184607 IN    1
10554686591937588953 JJ    3
13927759927860985106 VBZ   1
15267657372422890137 DT    2
15308085513773655218 NN    2


In [12]:
#visualization of POS:
from spacy import displacy
displacy.render(doc, style='dep', options={"distance":100})

In [14]:
#Named Entity Recognition:
# we've to use .ents to get the array of entities in the document object
doc2=nlp(u'Jim bought 300 shares of Acme Corp. in 2006')
for entity in doc2.ents:
    print(entity.text, entity.label_, spacy.explain(entity.label_))

Jim PERSON People, including fictional
300 CARDINAL Numerals that do not fall under another type
Acme Corp. ORG Companies, agencies, institutions, etc.
2006 DATE Absolute or relative dates or periods


In [15]:
displacy.render(doc2, style='ent', options={"distance":110})

In [None]:
#if NER isn't capable of finding the named entity you add one of your own
# from spacy.tokens import Span
# ORG= doc.vocab.strings[u'ORG']
# new_entity = Span(doc, start, end, label=ORG)
# doc.ents= list(doc.ents)+[new_entity]

In [None]:
#to add multiple words as named entities
# doc= nlp(u'Our company created a brand new vacuum cleaner.'
#         u'This new vacuum-cleaner is the best in show.'
#         )

#we'll do phrase matching and add multiple words(here vacuum cleaner) as our NER.
# from spacy.matcher import PhraseMatcher
# phrase_matcher = PhraseMatcher(nlp.vocab)
# phrase_list=['vacuum cleaner', 'vacuum-cleaner']
# phrase_patterns= [nlp(text) for text in phrase_list]
# phrase_matcher.add('Vac', phrase_patterns)
# found_matches = phrase_matcher(doc)

# from spacy.tokens import Span
# PROD= doc.vocab.strings[u'PRODUCT']
# new_ents= [Span(doc, match[1], match[2]) for match in found_matches]
# doc.ents= list(doc.ents)+new_ents

In [17]:
len([ent for ent in doc2.ents])

4