# POS Tagging
Program to allocate POS tags to a loaded text file using Wordnet

In [2]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk import pos_tag, word_tokenize

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\JYOTI\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\JYOTI\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [3]:
sentence = "This is a demo text by Jyoti Maurya for testing POS tag assignment."
tokenized_sentence = word_tokenize(sentence)
print(pos_tag(tokenized_sentence))

[('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('demo', 'JJ'), ('text', 'NN'), ('by', 'IN'), ('Jyoti', 'NNP'), ('Maurya', 'NNP'), ('for', 'IN'), ('testing', 'VBG'), ('POS', 'NNP'), ('tag', 'JJ'), ('assignment', 'NN'), ('.', '.')]


In [4]:
from nltk.tag import SequentialBackoffTagger
from nltk.corpus import wordnet
from nltk.probability import FreqDist

class WordNetTagger(SequentialBackoffTagger):
  def __init__(self, *args, **kwargs):
    SequentialBackoffTagger.__init__(self, *args, **kwargs)
    self.wordnet_tag_map = {
        'n': 'NN',
        's': 'JJ',
        'a': 'JJ',
        'r': 'RB',
        'v': 'VB'
        }
  def choose_tag(self, tokens, index, history):
    word = tokens[index]
    fd = FreqDist()
    
    for synset in wordnet.synsets(word):
      fd[synset.pos()] += 1
    return self.wordnet_tag_map.get(fd.max())

In [6]:
import nltk
#nltk.download('wordnet')
wt = WordNetTagger()
wt.tag(['Food', 'is', 'best'])

[('Food', 'NN'), ('is', 'VB'), ('best', 'JJ')]