# POS Tagging Basics

In [1]:
import spacy

In [2]:
nlp = spacy.load(name='en_core_web_sm')

In [3]:
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

In [4]:
doc

Apple is looking at buying U.K. startup for $1 billion

In [5]:
print(doc[2])

looking


In [6]:
print(doc[2].pos_)

VERB


In [7]:
print(doc[2].tag_)

# Spacy documentation link: https://spacy.io/usage/linguistic-features/

VBG


In [8]:
# table of information using for loop

for token in doc:
    print(f'{token.text:{10}} {token.lemma_:{8}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

# numbers in bracket used for space between the columns

Apple      Apple    PROPN    NNP    noun, proper singular
is         be       AUX      VBZ    verb, 3rd person singular present
looking    look     VERB     VBG    verb, gerund or present participle
at         at       ADP      IN     conjunction, subordinating or preposition
buying     buy      VERB     VBG    verb, gerund or present participle
U.K.       U.K.     PROPN    NNP    noun, proper singular
startup    startup  NOUN     NN     noun, singular or mass
for        for      ADP      IN     conjunction, subordinating or preposition
$          $        SYM      $      symbol, currency
1          1        NUM      CD     cardinal number
billion    billion  NUM      CD     cardinal number


# 2. Counting POS tags

**doc.count_by()** method accepts a specific token attribute as its argument and returns a frequency count of the given attribute as a dictionary object

In [9]:
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

In [10]:
# Count the frequencies of different coarse-grained POS tags:
POS_counts = doc.count_by(spacy.attrs.POS) # attrs for attributes

In [11]:
print(POS_counts)
# output is a dictionary

{96: 2, 87: 1, 100: 2, 85: 2, 92: 1, 99: 1, 93: 2}


In [19]:
for key, val in doc.count_by(spacy.attrs.POS).items():
    print(key, doc.vocab[key].text, val)

96 PROPN 2
87 AUX 1
100 VERB 2
85 ADP 2
92 NOUN 1
99 SYM 1
93 NUM 2


In [12]:
# decode POS code 
doc.vocab[96].text

'PROPN'

In [13]:
# checking POS for an individual token
doc[0].pos_

'PROPN'

# 3. Visualizing the POS

In [14]:
from spacy import displacy

In [15]:
# Create a simple Doc object
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

In [16]:
# Render the dependency
displacy.render(doc, style='dep', jupyter=True, options={'distance': 80})