In [1]:
# While stemming is a common technique in NLP, it is not always the best choice for every task.
# It is often used in conjunction with other techniques, such as lemmatization, to improve
# the accuracy of text analysis.
# lemmatization is a more advanced technique that takes into account the context of the word
# and its meaning, rather than just removing suffixes.
# This can lead to more accurate results, especially for tasks such as sentiment analysis or text classification
# where the meaning of the word is important.
import nltk
nltk.download("wordnet")
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/nyiorclement/nltk_data...


In [2]:
lemmatizer = WordNetLemmatizer()
connect_tokens = ['connecting', 'connected', 'connectivity', 'connect', 'connects']
# lemmatizing the tokens

for token in connect_tokens:
    print(f"{token} : {lemmatizer.lemmatize(token)}")

connecting : connecting
connected : connected
connectivity : connectivity
connect : connect
connects : connects


In [3]:
learn_tokens = ['learned', 'learning', 'learn', 'learns', 'learner', 'learners']

for token in learn_tokens:
    print(f"{token} : {lemmatizer.lemmatize(token)}")

learned : learned
learning : learning
learn : learn
learns : learns
learner : learner
learners : learner


In [4]:
likes_tokens = ['likes', 'better', 'worse']
for token in likes_tokens:
    print(f"{token} : {lemmatizer.lemmatize(token)}")

likes : like
better : better
worse : worse
