#Parts of Speech Using Spacy

In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
doc = nlp('The quick brown fox jumped over lazy dog\'s back')

In [3]:
for token in doc:
  print(token.text , token.pos_ , token.tag_ , str(spacy.explain(token.tag_)))

The DET DT determiner
quick ADJ JJ adjective
brown ADJ JJ adjective
fox NOUN NN noun, singular or mass
jumped VERB VBD verb, past tense
over ADP IN conjunction, subordinating or preposition
lazy ADJ JJ adjective
dog NOUN NN noun, singular or mass
's PART POS possessive ending
back NOUN NN noun, singular or mass


In [4]:
for token in doc:
  print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {str(spacy.explain(token.tag_))}')

The        DET      DT     determiner
quick      ADJ      JJ     adjective
brown      ADJ      JJ     adjective
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
over       ADP      IN     conjunction, subordinating or preposition
lazy       ADJ      JJ     adjective
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass


In [5]:
doc = nlp('I read books on NLP')
r = doc[1]

In [6]:
r

read

In [7]:
print(f'{r.text:{6}} {r.pos_:{8}} {r.tag_:{8}} {spacy.explain(r.tag_)}')

read   VERB     VBD      verb, past tense


In [8]:
doc2 = nlp('I am reading a book on NLP')
r = doc2[2]

In [9]:
r

reading

In [10]:
r.tag_

'VBG'

In [11]:
spacy.explain('VBG')

'verb, gerund or present participle'

In [12]:
r.pos_

'VERB'

In [14]:
r.pos

100

## Counting POS Tags
The `Doc.count_by()` method accepts a specific token attribute as its argument, and returns a frequency count of the given attribute as a dictionary object. Keys in the dictionary are the integer values of the given attribute ID, and values are the frequency. Counts of zero are not included.

In [15]:
doc = nlp('The quick brown fox jumped over the lazy dog\'s back')

In [16]:
pos_counts = doc.count_by(spacy.attrs.POS)
pos_counts

{84: 3, 85: 1, 90: 2, 92: 3, 94: 1, 100: 1}

In [17]:
doc.vocab[84].text

'ADJ'

In [18]:
doc.vocab[85].text

'ADP'

In [19]:
spacy.explain('ADP')

'adposition'

In [20]:
doc.vocab[100].text

'VERB'

In [22]:
for k,v in sorted(pos_counts.items()):
  print(f'{k} {doc.vocab[k].text:{10}} : {v}')

84 ADJ        : 3
85 ADP        : 1
90 DET        : 2
92 NOUN       : 3
94 PART       : 1
100 VERB       : 1


In [23]:
tag_counts = doc.count_by(spacy.attrs.TAG)

In [24]:
tag_counts

{74: 1,
 1292078113972184607: 1,
 10554686591937588953: 3,
 15267657372422890137: 2,
 15308085513773655218: 3,
 17109001835818727656: 1}

In [31]:
for k ,v in sorted(tag_counts.items()):
  print(f'{k} {doc.vocab[k].text:{10}} : {v}')

74 POS        : 1
1292078113972184607 IN         : 1
10554686591937588953 JJ         : 3
15267657372422890137 DT         : 2
15308085513773655218 NN         : 3
17109001835818727656 VBD        : 1


In [36]:
spacy.explain('VBD')

'verb, past tense'

In [None]:
#Thank you!!