***
Let us look at how we can do stemming using NLTK package
https://www.nltk.org/howto/stem.html
***

In [1]:
import nltk


In [None]:
#nltk.download()

In [2]:
from nltk.stem import PorterStemmer 
from nltk.tokenize import word_tokenize 

In [3]:
porter_stemmer = PorterStemmer() 
words = ["cats", "catty", "catlike","kittens"] 
  
for w in words: 
    print(w, " : ", porter_stemmer.stem(w)) 

cats  :  cat
catty  :  catti
catlike  :  catlik
kittens  :  kitten


In [4]:
words =["computer","compute"]
for w in words: 
    print(w, " : ", porter_stemmer.stem(w)) 

computer  :  comput
compute  :  comput


In [5]:
words =["programmer","programme","programs"]
for w in words: 
    print(w, " : ", porter_stemmer.stem(w)) 

programmer  :  programm
programme  :  programm
programs  :  program


In [6]:
from nltk.stem.snowball import SnowballStemmer

snowball_stemmer = SnowballStemmer("english")

words = ["cats", "catty", "catlike","kittens"] 
  
for w in words: 
    print(w, " : ", snowball_stemmer.stem(w)) 


cats  :  cat
catty  :  catti
catlike  :  catlik
kittens  :  kitten


In [7]:
words =["programmer","programme","programs"]
for w in words: 
    print(w, " : ", snowball_stemmer.stem(w)) 

programmer  :  programm
programme  :  programm
programs  :  program


***
Let us now look at lemmatization using NLTK
***

In [11]:
from nltk.stem import WordNetLemmatizer

In [20]:
wnl = WordNetLemmatizer()
words =["programmer","programme","programs"]
for w in words: 
    print(w, " : ", wnl.lemmatize(w)) 

programmer  :  programmer
programme  :  programme
programs  :  program


In [21]:
words = ["cats", "catty", "catlike","kittens"] 
  
for w in words: 
    print(w, " : ", wnl.lemmatize(w)) 

cats  :  cat
catty  :  catty
catlike  :  catlike
kittens  :  kitten


In [22]:
words = ["better", "best", "good"] 
  
for w in words: 
    print(w, " : ", wnl.lemmatize(w)) 

better  :  better
best  :  best
good  :  good


In [23]:
sentence ="Programmers use programming languages to write computer programs"
words = word_tokenize(sentence) 
   
for w in words: 
    print(w, " : ",porter_stemmer.stem(w),":",wnl.lemmatize(w)) 

Programmers  :  programm : Programmers
use  :  use : use
programming  :  program : programming
languages  :  languag : language
to  :  to : to
write  :  write : write
computer  :  comput : computer
programs  :  program : program


In [24]:
sentence = "Cars is a good movie with many animated car's"
words = word_tokenize(sentence) 
   
for w in words: 
    print(w, " : ", porter_stemmer.stem(w),":",wnl.lemmatize(w))

Cars  :  car : Cars
is  :  is : is
a  :  a : a
good  :  good : good
movie  :  movi : movie
with  :  with : with
many  :  mani : many
animated  :  anim : animated
car  :  car : car
's  :  's : 's


In [25]:
sentence = "Lets meet today. In this meeting let us continue to discuss on what we left during the last time i met you"
words = word_tokenize(sentence) 
   
for w in words: 
    print(w, " : ", porter_stemmer.stem(w),":",wnl.lemmatize(w))

Lets  :  let : Lets
meet  :  meet : meet
today  :  today : today
.  :  . : .
In  :  In : In
this  :  thi : this
meeting  :  meet : meeting
let  :  let : let
us  :  us : u
continue  :  continu : continue
to  :  to : to
discuss  :  discuss : discus
on  :  on : on
what  :  what : what
we  :  we : we
left  :  left : left
during  :  dure : during
the  :  the : the
last  :  last : last
time  :  time : time
i  :  i : i
met  :  met : met
you  :  you : you


***
Let us do lemmatization with spaCy
***

In [18]:
import spacy
nlp = spacy.load('en_core_web_md')



In [27]:
sentence ="Programmers use programming languages to write computer programs"
doc = nlp(sentence)
for word in doc:
    print(word.text + ':', word.lemma_)

Programmers: programmer
use: use
programming: programming
languages: language
to: to
write: write
computer: computer
programs: program


In [28]:
sentence ="cats catty catlike kittens"
doc = nlp(sentence)
for word in doc:
    print(word.text + ':', word.lemma_)

cats: cat
catty: catty
catlike: catlike
kittens: kitten


In [29]:
sentence = "Cars is a good movie with many animated car's"
doc = nlp(sentence)
for word in doc:
    print(word.text + ':', word.lemma_)

Cars: car
is: be
a: a
good: good
movie: movie
with: with
many: many
animated: animate
car: car
's: 's


In [30]:
sentence = "Lets meet today. In this meeting let us continue to discuss on what we left during the last time i met you"
doc = nlp(sentence)
for word in doc:
    print(word.text + ':', word.lemma_)

Lets: let
meet: meet
today: today
.: .
In: in
this: this
meeting: meeting
let: let
us: -PRON-
continue: continue
to: to
discuss: discuss
on: on
what: what
we: -PRON-
left: leave
during: during
the: the
last: last
time: time
i: i
met: meet
you: -PRON-
