In [2]:
import spacy;

In [39]:
import pandas as pd;
import numpy as np;

In [3]:
nlp = spacy.load('en_core_web_sm');

In [4]:
doc = nlp ("the quick brown fox jumped over the lazy dog's back.")

In [5]:
print (doc.text)

the quick brown fox jumped over the lazy dog's back.


In [10]:
print (doc[4])

jumped


In [13]:
#course-grained tag
print (doc[4].pos_)

VERB


In [14]:
#fine-grained tag
print (doc[4].tag_)

VBD


In [17]:
for i in doc:
    print (f"{i.text:{10}} {i.pos_:{10}} {i.tag_:{10}} {spacy.explain(i.tag_)}")

the        DET        DT         determiner
quick      ADJ        JJ         adjective
brown      ADJ        JJ         adjective
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [20]:
doc = nlp(u"I read books on NLP.");
doc[1]

read

In [21]:
i = doc[1]
print (f"{i.text:{10}} {i.pos_:{10}} {i.tag_:{10}} {spacy.explain(i.tag_)}")

read       VERB       VBP        verb, non-3rd person singular present


In [22]:
doc = nlp(u"I read a book on NLP.");
doc[1]

read

In [24]:
# it picks up context ques and tweaks POS tag based on it!
i = doc[1]
print (f"{i.text:{10}} {i.pos_:{10}} {i.tag_:{10}} {spacy.explain(i.tag_)}")

read       VERB       VBD        verb, past tense


In [25]:
doc = nlp ("the quick brown fox jumped over the lazy dog's back.")

In [26]:
# to count no. of times a word occurs
POS_counts = doc.count_by(spacy.attrs.POS);

In [28]:
# returns a dictionary of part of speech counts!
POS_counts
# output format: <POS_code> : <POS_count>

{96: 1, 83: 3, 99: 1, 84: 1, 89: 2, 91: 3, 93: 1}

In [33]:
# to get actual POS tags:
for obj in POS_counts:
    print ("POS tag: ", doc.vocab[obj].text, "\nPOS count: ", POS_counts(obj), "\n")

TypeError: 'dict' object is not callable

In [45]:
# to get actual POS tags and their freq. count:
for k, v in sorted (POS_counts.items()):
    #print ("POS tag: ", doc.vocab[k].text, "\nPOS count: ", POS_counts(k), "\n")
    print (f"{k}. {doc.vocab[k].text:{5}} {v}");

83. ADJ   3
84. ADP   1
89. DET   2
91. NOUN  3
93. PART  1
96. PUNCT 1
99. VERB  1


In [46]:
TAG_counts = doc.count_by(spacy.attrs.TAG); 

# to get actual fine-grained tags and their freq. count:
for k, v in sorted (TAG_counts.items()):
    #print ("POS tag: ", doc.vocab[k].text, "\nPOS count: ", POS_counts(k), "\n")
    print (f"{k}. {doc.vocab[k].text:{5}} {v}");

74. POS   1
1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    3
17109001835818727656. VBD   1


In [47]:
# for dependency tags:
DEP_counts = doc.count_by(spacy.attrs.DEP); 

# to get actual fine-grained tags and their freq. count:
for k, v in sorted (DEP_counts.items()):
    #print ("POS tag: ", doc.vocab[k].text, "\nPOS count: ", POS_counts(k), "\n")
    print (f"{k}. {doc.vocab[k].text:{5}} {v}");

399. amod  3
412. det   2
426. nsubj 1
436. pobj  1
437. poss  1
440. prep  1
442. punct 1
8110129090154140942. case  1
8206900633647566924. ROOT  1


## Visualizing POS

In [48]:
from spacy import displacy;

In [49]:
displacy.render(doc, style = 'dep', jupyter = True);

In [57]:
# creating options dictionary:
''' monospace fonts: 
Andalé Mono (monospace)
Courier (monospace)
Lucida (monospace)
Monaco (monospace)
'''
options = {'distance': 110, 'compact' : 'True', 'color': 'yellow', 'bg': "#EE0000", 'font' : 'Monaco' }

In [54]:
# render with custom options
displacy.render(doc, style = 'dep', jupyter = True, options = options);

In [55]:
doc_2 = nlp(u"This is a sentence. This is another sentence, definitely longer than the first one.")

In [56]:
spans = list(doc_2.sents)

In [59]:
# to view this, go to: 127.0.0.1:<port_served_on>
displacy.serve (spans, style = 'dep', options = options)


[93m    Serving on port 5000...[0m
    Using the 'dep' visualizer


    Shutting down server on port 5000.

