# Part of Speech Tagging

Part of Speech (POS) tagging in Natural Language Processing (NLP) is the process of labeling each word in a sentence with its corresponding grammatical category or "part of speech," such as noun, verb, adjective, adverb, etc. This helps in understanding the role of each word in a sentence and how it relates to the other words.

In [1]:
# import necessary packages

import nltk
from nltk.tokenize import sent_tokenize

nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [2]:
corpus = """On a sunny afternoon, the children eagerly gathered in the park to play their favorite games.
Sarah, the youngest, carried a bright red ball, while her brother John rode his new bicycle around the winding paths.
The air was filled with laughter as they raced each other across the grassy fields. Nearby, their parents sat on a bench, watching them with smiles and occasionally waving.
As the sun began to set, the sky turned a beautiful shade of orange and pink.
The wind picked up slightly, rustling the leaves of the trees, and the children knew it was almost time to go home.
Reluctantly, they packed up their toys and promised to return the next day for more fun.
"""

In [3]:
sentences = sent_tokenize(corpus)
sentences

['On a sunny afternoon, the children eagerly gathered in the park to play their favorite games.',
 'Sarah, the youngest, carried a bright red ball, while her brother John rode his new bicycle around the winding paths.',
 'The air was filled with laughter as they raced each other across the grassy fields.',
 'Nearby, their parents sat on a bench, watching them with smiles and occasionally waving.',
 'As the sun began to set, the sky turned a beautiful shade of orange and pink.',
 'The wind picked up slightly, rustling the leaves of the trees, and the children knew it was almost time to go home.',
 'Reluctantly, they packed up their toys and promised to return the next day for more fun.']

In [5]:
## Find out the Pos Tag

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

for sentence in sentences:
  words = nltk.word_tokenize(sentence)
  words = [word for word in words if word not in set(stopwords.words('english'))]
  pos_tags = nltk.pos_tag(words)
  print(pos_tags)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


[('On', 'IN'), ('sunny', 'JJ'), ('afternoon', 'NN'), (',', ','), ('children', 'NNS'), ('eagerly', 'RB'), ('gathered', 'VBN'), ('park', 'NN'), ('play', 'NN'), ('favorite', 'JJ'), ('games', 'NNS'), ('.', '.')]
[('Sarah', 'NNP'), (',', ','), ('youngest', 'JJS'), (',', ','), ('carried', 'VBD'), ('bright', 'JJ'), ('red', 'JJ'), ('ball', 'NN'), (',', ','), ('brother', 'NN'), ('John', 'NNP'), ('rode', 'VBD'), ('new', 'JJ'), ('bicycle', 'NN'), ('around', 'IN'), ('winding', 'VBG'), ('paths', 'NNS'), ('.', '.')]
[('The', 'DT'), ('air', 'NN'), ('filled', 'VBD'), ('laughter', 'NN'), ('raced', 'VBN'), ('across', 'IN'), ('grassy', 'JJ'), ('fields', 'NNS'), ('.', '.')]
[('Nearby', 'RB'), (',', ','), ('parents', 'NNS'), ('sat', 'VBD'), ('bench', 'NN'), (',', ','), ('watching', 'VBG'), ('smiles', 'NNS'), ('occasionally', 'RB'), ('waving', 'VBG'), ('.', '.')]
[('As', 'IN'), ('sun', 'NN'), ('began', 'VBD'), ('set', 'VBN'), (',', ','), ('sky', 'NN'), ('turned', 'VBD'), ('beautiful', 'JJ'), ('shade', 'JJ')