## Parts Of Speech Tags

In [17]:
import nltk
nltk.download('tagsets_json')
nltk.help.upenn_tagset()

[nltk_data] Downloading package tagsets_json to
[nltk_data]     /Users/saivarsha/nltk_data...


$: dollar
    $ -$ --$ A$ C$ HK$ M$ NZ$ S$ U.S.$ US$
'': closing quotation mark
    ' ''
(: opening parenthesis
    ( [ {
): closing parenthesis
    ) ] }
,: comma
    ,
--: dash
    --
.: sentence terminator
    . ! ?
:: colon or ellipsis
    : ; ...
CC: conjunction, coordinating
    & 'n and both but either et for less minus neither nor or plus so
    therefore times v. versus vs. whether yet
CD: numeral, cardinal
    mid-1890 nine-thirty forty-two one-tenth ten million 0.5 one forty-
    seven 1987 twenty '79 zero two 78-degrees eighty-four IX '60s .025
    fifteen 271,124 dozen quintillion DM2,000 ...
DT: determiner
    all an another any both del each either every half la many much nary
    neither no some such that the them these this those
EX: existential there
    there
FW: foreign word
    gemeinschaft hund ich jeux habeas Haementeria Herr K'ang-si vous
    lutihaw alai je jour objets salutaris fille quibusdam pas trop Monte
    terram fiche oui corporis ...
IN: preposition or

[nltk_data]   Unzipping help/tagsets_json.zip.


In [1]:
# Dr. APJ Abdul Kalam's "Three Visions for India" Speech excerpt
paragraph = """
I have three visions for India. In 3000 years of our history, people from all over the world 
have come and invaded us, captured our lands, conquered our minds. From Alexander onwards, 
the Greeks, the Turks, the Moguls, the Portuguese, the British, the French, the Dutch, 
all of them came and looted us, took over what was ours. Yet we have not done this to any 
other nation. We have not conquered anyone. We have not grabbed their land, their culture, 
their history and tried to enforce our way of life on them. Why? Because we respect the 
freedom of others. That is why my first vision is that of freedom. I believe that India 
got its first vision of this in 1857, when we started the war of Independence. It is this 
freedom that we must protect and nurture and build on. If we are not free, no one will 
respect us. My second vision for India is development. For fifty years we have been a 
developing nation. It is time we see ourselves as a developed nation. We are among top 
five nations of the world in terms of GDP. We have 10 percent growth rate in most areas. 
Our poverty levels are falling. Our achievements are being globally recognized today. 
Yet we lack the self-confidence to see ourselves as a developed nation, self-reliant and 
self-assured. Isn't this incorrect? I have a third vision. India must stand up to the 
world. Because I believe that unless India stands up to the world, no one will respect us. 
Only strength respects strength. We must be strong not only as a military power but also 
as an economic power. Both must go hand-in-hand.
"""

In [2]:
import nltk
sentences = nltk.sent_tokenize(paragraph)

In [3]:
sentences

['\nI have three visions for India.',
 'In 3000 years of our history, people from all over the world \nhave come and invaded us, captured our lands, conquered our minds.',
 'From Alexander onwards, \nthe Greeks, the Turks, the Moguls, the Portuguese, the British, the French, the Dutch, \nall of them came and looted us, took over what was ours.',
 'Yet we have not done this to any \nother nation.',
 'We have not conquered anyone.',
 'We have not grabbed their land, their culture, \ntheir history and tried to enforce our way of life on them.',
 'Why?',
 'Because we respect the \nfreedom of others.',
 'That is why my first vision is that of freedom.',
 'I believe that India \ngot its first vision of this in 1857, when we started the war of Independence.',
 'It is this \nfreedom that we must protect and nurture and build on.',
 'If we are not free, no one will \nrespect us.',
 'My second vision for India is development.',
 'For fifty years we have been a \ndeveloping nation.',
 'It is time

In [14]:
import nltk
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to /Users/saivarsha/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/saivarsha/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/saivarsha/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /Users/saivarsha/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [15]:
# we'll find the POS Tag

for i in range(len(sentences)):
    words = nltk.word_tokenize(sentences[i])
    words = [word for word in words if word not in set(stopwords.words('english'))]
    pos_tag = nltk.pos_tag(words)
    print(pos_tag)

[('I', 'PRP'), ('three', 'CD'), ('visions', 'NNS'), ('India', 'NNP'), ('.', '.')]
[('In', 'IN'), ('3000', 'CD'), ('years', 'NNS'), ('history', 'NN'), (',', ','), ('people', 'NNS'), ('world', 'NN'), ('come', 'VBP'), ('invaded', 'VBN'), ('us', 'PRP'), (',', ','), ('captured', 'VBD'), ('lands', 'NNS'), (',', ','), ('conquered', 'VBD'), ('minds', 'NNS'), ('.', '.')]
[('From', 'IN'), ('Alexander', 'NNP'), ('onwards', 'NNS'), (',', ','), ('Greeks', 'NNP'), (',', ','), ('Turks', 'NNP'), (',', ','), ('Moguls', 'NNP'), (',', ','), ('Portuguese', 'NNP'), (',', ','), ('British', 'NNP'), (',', ','), ('French', 'NNP'), (',', ','), ('Dutch', 'NNP'), (',', ','), ('came', 'VBD'), ('looted', 'JJ'), ('us', 'PRP'), (',', ','), ('took', 'VBD'), ('.', '.')]
[('Yet', 'RB'), ('done', 'VBN'), ('nation', 'NN'), ('.', '.')]
[('We', 'PRP'), ('conquered', 'VBD'), ('anyone', 'NN'), ('.', '.')]
[('We', 'PRP'), ('grabbed', 'VBD'), ('land', 'NN'), (',', ','), ('culture', 'NN'), (',', ','), ('history', 'NN'), ('tried'

In [21]:
sentence = "Taj Mahal is a beautiful Monument"
words = nltk.word_tokenize(sentence)
print(nltk.pos_tag(words))

[('Taj', 'NNP'), ('Mahal', 'NNP'), ('is', 'VBZ'), ('a', 'DT'), ('beautiful', 'JJ'), ('Monument', 'NN')]


In [25]:
for word in "Taj Mahal is a beautiful Monument".split():
    print(nltk.pos_tag(word))

import nltk  # Make sure nltk is imported

sentence = "Taj Mahal is a beautiful Monument"
words = sentence.split()

# Method 1: Process the whole sentence at once
print(nltk.pos_tag(words))

# Method 2: If you want to process each word separately
for word in words:
    # Convert single word to a list containing that word
    print(nltk.pos_tag([word]))

TypeError: tokens: expected a list of strings, got a string