## POS Basics

In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_lg')

In [9]:
doc = nlp(u'The quick brown fox jumped over the lazy dog.')

In [10]:
print(doc.text)

The quick brown fox jumped over the lazy dog.


In [11]:
print(doc[4].pos_)

VERB


In [12]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

The        DET        DT         determiner
quick      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
brown      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
fox        PROPN      NNP        noun, proper singular
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective (English), other noun-modifier (Chinese)
dog        NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [28]:
doc = nlp(u"I reads books on NLP.")

In [29]:
word = doc[1]

In [30]:
word.text

'reads'

In [31]:
token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

reads      VERB       VBZ        verb, 3rd person singular present


In [32]:
doc = nlp(u"I read a books on NLP.")

In [33]:
word = doc[1]

token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

read       VERB       VBP        verb, non-3rd person singular present


In [37]:
doc = nlp(u"The quick brown fox jumped over the lazy dog back.")

In [38]:
POS_counts = doc.count_by(spacy.attrs.POS)

In [39]:
print(POS_counts)

{90: 2, 84: 3, 96: 1, 100: 1, 85: 1, 92: 1, 86: 1, 97: 1}


In [41]:
doc.vocab[84].text

'ADJ'

In [42]:
doc[2]

brown

In [45]:
for k,v in sorted(POS_counts.items()):
    print(f"{k:{10}} {doc.vocab[k].text:{5}} {v:{5}}")

        84 ADJ       3
        85 ADP       1
        86 ADV       1
        90 DET       2
        92 NOUN      1
        96 PROPN     1
        97 PUNCT     1
       100 VERB      1


In [46]:
Tag_counts = doc.count_by(spacy.attrs.TAG)

for k,v in sorted(Tag_counts.items()):
    print(f"{k:{10}} {doc.vocab[k].text:{5}} {v:{5}}")

164681854541413346 RB        1
1292078113972184607 IN        1
10554686591937588953 JJ        3
12646065887601541794 .         1
15267657372422890137 DT        2
15308085513773655218 NN        1
15794550382381185553 NNP       1
17109001835818727656 VBD       1


In [47]:
len(doc.vocab)

802