In [3]:
#Perform standard imports
import spacy
nlp = spacy.load('en_core_web_sm')

In [4]:
#create a simple DOC object
doc = nlp (u"The quick brown fox jumped the lazy dog's back.")

In [5]:
#Print the full text:
print(doc.text)

The quick brown fox jumped the lazy dog's back.


In [6]:
#Print the fifth word and associated tags:
print(doc[4].text, doc[4].pos_, doc[4].tag_, spacy.explain(doc[4].tag_))

jumped VERB VBD verb, past tense


We can apply this technnique to the entire Doc object:

In [7]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

The        DET      DT     determiner
quick      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
brown      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
the        DET      DT     determiner
lazy       ADJ      JJ     adjective (English), other noun-modifier (Chinese)
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass
.          PUNCT    .      punctuation mark, sentence closer


Counting pos

In [8]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

# Count the frequencies of different caarse-grained POS tags:
POS_counts = doc.count_by(spacy.attrs.POS)
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [9]:
doc = nlp(u'I read books on NLP.')
r = doc[1]

print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBP    verb, non-3rd person singular present


counting the pos tags

Counting POS tags
The doc.count_by() method accepts a specific token attributes as its argument, and returns a frequancy count of the given attributes as a dictionary object keys in the dictionary are the integer vlaue of the given attributesID,  and values are the frequancy. counts of zero are not included

Craete frequency list of POS tags form the entire document

In [10]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

# Count the frequencies of different coarse-grained POS tags:
POS_counts = doc.count_by(spacy.attrs.POS)
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [11]:
doc.vocab[83].text

'LANG'

create a frequancy list of pos

In [13]:
for k,v in sorted(POS_counts.items()):
    print(f'{k}. {doc.vocab[k].text:{5}}: {v}')

84. ADJ  : 3
85. ADP  : 1
90. DET  : 2
92. NOUN : 3
94. PART : 1
97. PUNCT: 1
100. VERB : 1


In [14]:
#Count the different fine-grained tags:
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k,v in sorted (POS_counts.items()):
    print(f'{k}.{doc.vocab[k].text:{4}}: {v}')

84.ADJ : 3
85.ADP : 1
90.DET : 2
92.NOUN: 3
94.PART: 1
97.PUNCT: 1
100.VERB: 1


Visualizing Parts of speech
spaCy offers an outstanding visualizer called displaCy:

In [15]:
#Perform standard imports
import spacy 
nlp = spacy.load('en_core_web_sm')

#import the displaCy library
from spacy import displacy

In [16]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [17]:
#Render the dependency parse immediately inside jupyter:
displacy.render(doc, style='dep', jupyter=True, options={'distance':110})

Creating Visualization outside of jupyter
if 

In [None]:
displacy.serve(doc, style='dep', options={'distance':110})




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...



Displacy.server() accepts a single Doc or list of Doc or list of Doc objects. Since large text are difficult to view in one line, you may want to pass a list of spans instead. Each span will appear on its own line:

In [None]:
doc2 = nlp(u"This is a sentence.This is another ,possibly longer sentence.")

# Create spans from Doc.sents:
spans = list(doc2.sents)

displacy.server(spans, style='dep', options={'distance': 110})

For a full list of options visit https://spacy.io/api/top-level#display_options

In [None]:
options = {'distance':110, 'compact':'True', 'color':'yellow', 'bg': '#09a3d5', 'font': 'Times'}
.server(doc, style='dep', options=options)