In [1]:
# Part-of-Speech Tagging

# O POS Tagging é o processo de rotulação de elementos textuais - tipicamente palavras e pontuação - com o fim de evidenciar 
# a estrutura gramatical de um determinado trecho de texto. 
# Em reconhecimento e síntese de fala, seu uso é útil para extração de termos, desambiguação, composição de novas frases e pesquisa lexicográfica.

import nltk
from nltk.tag import pos_tag
from nltk.tokenize import sent_tokenize, word_tokenize

# Texto
frase = "Time to start with natural language processing. Python will make our life easier!"

# Tokenization em sentenças
sent_tokens = sent_tokenize(frase)
print(sent_tokens)

['Time to start with natural language processing.', 'Python will make our life easier!']


In [2]:
# Tokenization em palavras
word_tokens = word_tokenize(frase)
print(word_tokens)


['Time', 'to', 'start', 'with', 'natural', 'language', 'processing', '.', 'Python', 'will', 'make', 'our', 'life', 'easier', '!']


In [3]:
# Aplicando pos_tag aos tokens
tags = pos_tag(word_tokens)

# Print na tela
print(tags)

[('Time', 'NNP'), ('to', 'TO'), ('start', 'VB'), ('with', 'IN'), ('natural', 'JJ'), ('language', 'NN'), ('processing', 'NN'), ('.', '.'), ('Python', 'NNP'), ('will', 'MD'), ('make', 'VB'), ('our', 'PRP$'), ('life', 'NN'), ('easier', 'JJR'), ('!', '.')]


In [4]:
# Visualizando o significado de cada código do POS Tag.
# Nesse caso, visualizando VB
nltk.help.upenn_tagset('VB')

# Definição para cada definição de código
list_of_tags = []
for pair in tags:
    list_of_tags.append(pair[1])
list_of_tags = list(set(list_of_tags))
list_of_tags

VB: verb, base form
    ask assemble assess assign assume atone attention avoid bake balkanize
    bank begin behold believe bend benefit bevel beware bless boil bomb
    boost brace break bring broil brush build ...


['MD', 'VB', 'NN', 'JJR', 'JJ', 'TO', '.', 'PRP$', 'IN', 'NNP']

In [5]:
for pos in list_of_tags:
    print(nltk.help.upenn_tagset(pos))

MD: modal auxiliary
    can cannot could couldn't dare may might must need ought shall should
    shouldn't will would
None
VB: verb, base form
    ask assemble assess assign assume atone attention avoid bake balkanize
    bank begin behold believe bend benefit bevel beware bless boil bomb
    boost brace break bring broil brush build ...
None
NN: noun, common, singular or mass
    common-carrier cabbage knuckle-duster Casino afghan shed thermostat
    investment slide humour falloff slick wind hyena override subhumanity
    machinist ...
None
JJR: adjective, comparative
    bleaker braver breezier briefer brighter brisker broader bumper busier
    calmer cheaper choosier cleaner clearer closer colder commoner costlier
    cozier creamier crunchier cuter ...
None
JJ: adjective or numeral, ordinal
    third ill-mannered pre-war regrettable oiled calamitous first separable
    ectoplasmic battery-powered participatory fourth still-to-be-named
    multilingual multi-disciplinary ...
None
