In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [2]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [3]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [4]:
print(doc[4].text)

jumped


In [5]:
print(doc[4])

jumped


## Fine-grained tag

In [6]:
print(doc[4].pos_)

VERB


In [7]:
print(doc[4].tag_)

VBD


___
## Fine-grained Part-of-speech Tags
Tokens are subsequently given a fine-grained tag as determined by morphology:
<table>
<tr><th>POS</th><th>Description</th><th>Fine-grained Tag</th><th>Description</th><th>Morphology</th></tr>
<tr><td>VERB</td><td></td><td>VBD</td><td>verb, past tense</td><td>VerbForm=fin Tense=past</td></tr>
</table>

In [8]:
for token in doc:
    print(f"{token.text:{10}} {spacy.explain(token.pos_):{20}} {spacy.explain(token.tag_):{20}}")

The        determiner           determiner          
quick      adjective            adjective           
brown      adjective            adjective           
fox        noun                 noun, singular or mass
jumped     verb                 verb, past tense    
over       adposition           conjunction, subordinating or preposition
the        determiner           determiner          
lazy       adjective            adjective           
dog        noun                 noun, singular or mass
's         particle             possessive ending   
back       noun                 noun, singular or mass
.          punctuation          punctuation mark, sentence closer


In [9]:
doc = nlp(u"I am reading a book on NLP.")
token = doc[1] # reading
print(f"{token.text:{10}} {spacy.explain(token.pos_):{20}} {spacy.explain(token.tag_):{20}}")

am         auxiliary            verb, non-3rd person singular present


In [10]:
doc = nlp(u"I read a book on NLP.")
token = doc[1] # read
print(f"{token.text:{10}} {spacy.explain(token.pos_):{20}} {spacy.explain(token.tag_):{20}}")

read       verb                 verb, past tense    


## Parts of speech count

In [11]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [12]:
POS_count = doc.count_by(spacy.attrs.POS)
POS_count

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [13]:
spacy.explain(doc.vocab[84].text) # 84: 3 -> there are 3 adjectives in sentence

'adjective'

In [14]:
doc[2].pos

84

In [15]:
for k,v in sorted(POS_count.items()):
    print(f"{k:{5}}. \t {doc.vocab[k].text:{10}} {spacy.explain(doc.vocab[k].text):{20}} {v}")

   84. 	 ADJ        adjective            3
   85. 	 ADP        adposition           1
   90. 	 DET        determiner           2
   92. 	 NOUN       noun                 3
   94. 	 PART       particle             1
   97. 	 PUNCT      punctuation          1
  100. 	 VERB       verb                 1


In [16]:
TAG_count = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(TAG_count.items()):
    print(f"{k:{20}}. \t {doc.vocab[k].text:{10}} {spacy.explain(doc.vocab[k].text):{50}} {v}")

                  74. 	 POS        possessive ending                                  1
 1292078113972184607. 	 IN         conjunction, subordinating or preposition          1
10554686591937588953. 	 JJ         adjective                                          3
12646065887601541794. 	 .          punctuation mark, sentence closer                  1
15267657372422890137. 	 DT         determiner                                         2
15308085513773655218. 	 NN         noun, singular or mass                             3
17109001835818727656. 	 VBD        verb, past tense                                   1


In [17]:
DEP_count = doc.count_by(spacy.attrs.DEP)
for k,v in sorted(DEP_count.items()):
    try:
        print(f"{k:{20}}. \t {doc.vocab[k].text:{10}} {spacy.explain(doc.vocab[k].text):{50}} {v}")
    except:
        print(f"{k:{20}}. \t {doc.vocab[k].text:{10}} {doc.vocab[k].text:{50}} {v}")

                 402. 	 amod       adjectival modifier                                3
                 415. 	 det        determiner                                         2
                 429. 	 nsubj      nominal subject                                    1
                 439. 	 pobj       object of preposition                              1
                 440. 	 poss       possession modifier                                1
                 443. 	 prep       prepositional modifier                             1
                 445. 	 punct      punctuation                                        1
 8110129090154140942. 	 case       case marking                                       1
 8206900633647566924. 	 ROOT       ROOT                                               1
