# Stemming in NLTK

In [6]:
import nltk
import spacy

In [7]:
from nltk.stem import PorterStemmer, SnowballStemmer

stemmer = PorterStemmer()


In [8]:
words = ["eating", "eats", "eat", "ate", "adjustable", "rafting", "ability", "meeting"]
for word in words:
    print(word, "| ", stemmer.stem(word))

eating |  eat
eats |  eat
eat |  eat
ate |  ate
adjustable |  adjust
rafting |  raft
ability |  abil
meeting |  meet


# Lemmitization using spacy 

In [9]:
nlp = spacy.load("en_core_web_sm")

In [10]:
doc = nlp("eating eats eat ate adjustable rafting ability meeting better")
for token in doc:
    print(token.text, "| ", token.lemma_)

eating |  eat
eats |  eat
eat |  eat
ate |  eat
adjustable |  adjustable
rafting |  raft
ability |  ability
meeting |  meeting
better |  well


# Customizing lemmatizer

In [13]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [17]:
ar = nlp.get_pipe("attribute_ruler")
ar.add([[{"TEXT":"Bro"}],[{"TEXT":"Brah"}]],{"LEMMA":"Brother"})

doc = nlp("Bro, you wanna go? Brah, don't say no! I am exhausted")
for token in doc:
    print(token.text, "| ", token.lemma_)

Bro |  Brother
, |  ,
you |  you
wanna |  wanna
go |  go
? |  ?
Brah |  Brother
, |  ,
do |  do
n't |  not
say |  say
no |  no
! |  !
I |  I
am |  be
exhausted |  exhaust


In [19]:
doc[0]

Bro

In [21]:
doc[0].lemma_

'Brother'