# Parts of Speech Basics

In [1]:
# Perform standard imports
import spacy 
nlp = spacy.load('en_core_web_sm')

In [2]:
# Create a simple doc object
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

# View token tags

In [3]:
# Print the full text 
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [4]:
# Print the fifty words and associated tags:
print(doc[4].text, doc[4].pos_, doc[4].tag_, spacy.explain(doc[4].tag_))

jumped VERB VBD verb, past tense


In [5]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

The        DET      DT     determiner
quick      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
brown      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
over       ADP      IN     conjunction, subordinating or preposition
the        DET      DT     determiner
lazy       ADJ      JJ     adjective (English), other noun-modifier (Chinese)
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass
.          PUNCT    .      punctuation mark, sentence closer


In [6]:
doc = nlp(u'I read books on NLP.')
r = doc[1]
print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBP    verb, non-3rd person singular present


In [7]:
doc = nlp(u'I read a book on NLP.')
r = doc[1]
print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBD    verb, past tense


# Counting pos tags

In [8]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")
# Count the frequencies of different coarse-grained pos tags:
POS_counts = doc.count_by(spacy.attrs.POS)
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [9]:
for k,v in sorted(POS_counts.items()):
    print(f'{k}. {doc.vocab[k].text:{4}}: {v}')

84. ADJ : 3
85. ADP : 1
90. DET : 2
92. NOUN: 3
94. PART: 1
97. PUNCT: 1
100. VERB: 1


In [10]:
# Count the difference fine-grained tags
DEP_counts = doc.count_by(spacy.attrs.DEP)

for k,v in sorted(DEP_counts.items()):
    print(f'{k}. {doc.vocab[k].text:{4}}: {v}')

402. amod: 3
415. det : 2
429. nsubj: 1
439. pobj: 1
440. poss: 1
443. prep: 1
445. punct: 1
8110129090154140942. case: 1
8206900633647566924. ROOT: 1


In [11]:
# Count the different dependences
DEP_counts = doc.count_by(spacy.attrs.DEP)

for k,v in sorted(DEP_counts.items()):
    print(f'{k}. {doc.vocab[k].text:{4}}: {v}')

402. amod: 3
415. det : 2
429. nsubj: 1
439. pobj: 1
440. poss: 1
443. prep: 1
445. punct: 1
8110129090154140942. case: 1
8206900633647566924. ROOT: 1


# Visualizing Parts of Speech

In [12]:
import spacy
nlp = spacy.load('en_core_web_sm')
from spacy import displacy

In [13]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [14]:
displacy.render(doc, style='dep', jupyter=True, options={'distance':110})

In [16]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{7}} {token.dep_:{7}} {spacy.explain(token.dep_)}')

The        DET     det     determiner
quick      ADJ     amod    adjectival modifier
brown      ADJ     amod    adjectival modifier
fox        NOUN    nsubj   nominal subject
jumped     VERB    ROOT    root
over       ADP     prep    prepositional modifier
the        DET     det     determiner
lazy       ADJ     amod    adjectival modifier
dog        NOUN    poss    possession modifier
's         PART    case    case marking
back       NOUN    pobj    object of preposition
.          PUNCT   punct   punctuation


In [None]:
displacy.serve(doc, style='dep', options={'distance':110})




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

