# Importing necessary libraries


In [1]:
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import wordnet
from nltk import pos_tag, word_tokenize


# Downloading necessary NLTK resources


In [2]:
import nltk
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

# Sample text

In [4]:
text = "The leaves on the trees are falling. The children are playing with leaves in the park."


# Tokenizing the text into words


In [5]:
words = word_tokenize(text)


# 1. Stemming


In [6]:
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in words]
print("Stemmed Words: ", stemmed_words)

Stemmed Words:  ['the', 'leav', 'on', 'the', 'tree', 'are', 'fall', '.', 'the', 'children', 'are', 'play', 'with', 'leav', 'in', 'the', 'park', '.']


# 2. Lemmatization


In [7]:
lemmatizer = WordNetLemmatizer()

# Function to get part of speech tags compatible with WordNet


In [8]:
def get_wordnet_pos(word):
    tag = pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}
    return tag_dict.get(tag, wordnet.NOUN)

In [9]:
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(word)) for word in words]
print("Lemmatized Words: ", lemmatized_words)

Lemmatized Words:  ['The', 'leaf', 'on', 'the', 'tree', 'be', 'fall', '.', 'The', 'child', 'be', 'play', 'with', 'leaf', 'in', 'the', 'park', '.']


# Compare original, stemmed, and lemmatized words


In [10]:
comparison = list(zip(words, stemmed_words, lemmatized_words))
print("\nComparison (Original, Stemmed, Lemmatized):")
for original, stemmed, lemmatized in comparison:
    print(f"{original:15} -> {stemmed:15} -> {lemmatized:15}")



Comparison (Original, Stemmed, Lemmatized):
The             -> the             -> The            
leaves          -> leav            -> leaf           
on              -> on              -> on             
the             -> the             -> the            
trees           -> tree            -> tree           
are             -> are             -> be             
falling         -> fall            -> fall           
.               -> .               -> .              
The             -> the             -> The            
children        -> children        -> child          
are             -> are             -> be             
playing         -> play            -> play           
with            -> with            -> with           
leaves          -> leav            -> leaf           
in              -> in              -> in             
the             -> the             -> the            
park            -> park            -> park           
.               -> .               ->

# Import PorterStemmer


In [11]:
from nltk.stem import PorterStemmer


# List of words to stem


In [14]:
words = ["running", "jumps", "easily", "flying"]


# Initialize the stemmer

In [15]:
stemmer = PorterStemmer()


# TODO: Stem each word and print the result


In [17]:
stemmed_words = [stemmer.stem(word) for word in words]

print("Stemmed Words:", stemmed_words)


Stemmed Words: ['run', 'jump', 'easili', 'fli']


# Import WordNetLemmatizer and other necessary modules


In [18]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk import pos_tag


# List of words to lemmatize


In [19]:
words = ["running", "jumps", "easily", "flying"]



# Initialize the lemmatizer


In [20]:
lemmatizer = WordNetLemmatizer()


# Helper function to get WordNet PoS tag


In [21]:
def get_wordnet_pos(word):
    tag = pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ, "N": wordnet.NOUN, "V": wordnet.VERB, "R": wordnet.ADV}
    return tag_dict.get(tag, wordnet.NOUN)

# TODO: Lemmatize each word with PoS tag and print the result


In [22]:
lemmatized_words = [lemmatizer.lemmatize(word, get_wordnet_pos(word)) for word in words]

print("Lemmatized Words:", lemmatized_words)


Lemmatized Words: ['run', 'jump', 'easily', 'fly']
