## Lemmatizer Examples

In [1]:
import nltk
#nltk.download('wordnet')
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

#### Simple lemmatization

In [2]:
lemmatizer = WordNetLemmatizer()
lemmatizer.lemmatize("stripes")

'stripe'

In [3]:
lemmatizer.lemmatize('stripes', 'v')

'strip'

#### Simple PoS tagging

In [4]:
nltk.pos_tag(['feet'])

[('feet', 'NNS')]

#### Sentence Pos tagging

In [5]:
#sentence = "The striped bats are hanging on their feet for best"
sentence = "They're hanging fast on their feet for best"
tagged = nltk.pos_tag(nltk.word_tokenize(sentence))
tagged

[('They', 'PRP'),
 ("'re", 'VBP'),
 ('hanging', 'VBG'),
 ('fast', 'RB'),
 ('on', 'IN'),
 ('their', 'PRP$'),
 ('feet', 'NNS'),
 ('for', 'IN'),
 ('best', 'JJS')]

#### Wordnet to WordNetLemmatizer nomenclature

In [6]:
def rewrite_lemmatizer_pos(tag):
    """rewrites POS tags to those that lemmatize() accepts"""
    tag = tag[0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}
    # noun by default
    return tag_dict.get(tag, wordnet.NOUN)

In [7]:
rewrite_lemmatizer_pos("JJ")

'a'

In [8]:
rewrite_lemmatizer_pos("RB")

'r'

In [9]:
lemmatized = [(lemmatizer.lemmatize(string, rewrite_lemmatizer_pos(pos)), rewrite_lemmatizer_pos(pos)) for string, pos in tagged]
lemmatized

[('They', 'n'),
 ("'re", 'v'),
 ('hang', 'v'),
 ('fast', 'r'),
 ('on', 'n'),
 ('their', 'n'),
 ('foot', 'n'),
 ('for', 'n'),
 ('best', 'a')]

In [77]:
lemmatized_nouns = [(lemmatizer.lemmatize(string, rewrite_lemmatizer_pos(pos)), rewrite_lemmatizer_pos(pos)) for string, pos in tagged if rewrite_lemmatizer_pos(pos) == 'n']
lemmatized_nouns

[('They', 'n'), ('on', 'n'), ('their', 'n'), ('foot', 'n'), ('for', 'n')]