In [1]:
import nltk
from nltk.stem import PorterStemmer, SnowballStemmer, LancasterStemmer, WordNetLemmatizer
import spacy

# Download necessary resources (only needed once)
nltk.download('wordnet')
nltk.download('omw-1.4') # Open Multilingual Wordnet (for WordNetLemmatizer)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\messa\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\messa\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [3]:
# Load spaCy model (you may need to download it first)
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading en_core_web_sm model...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

In [4]:
words = ["running", "easily", "flies", "better", "dogs", "universities", "releasing", "released"]

# --- NLTK Stemmers ---
porter = PorterStemmer()
snowball = SnowballStemmer("english")
lancaster = LancasterStemmer()

print("Stemming Results:")
print(f"{'Word':<15} {'Porter':<15} {'Snowball':<15} {'Lancaster':<15}")
for word in words:
    print(f"{word:<15} {porter.stem(word):<15} {snowball.stem(word):<15} {lancaster.stem(word):<15}")

Stemming Results:
Word            Porter          Snowball        Lancaster      
running         run             run             run            
easily          easili          easili          easy           
flies           fli             fli             fli            
better          better          better          bet            
dogs            dog             dog             dog            
universities    univers         univers         univers        
releasing       releas          releas          releas         
released        releas          releas          releas         


In [5]:
# --- NLTK Lemmatizer ---
lemmatizer = WordNetLemmatizer()

print("\nNLTK Lemmatization Results (default POS - noun):")
print(f"{'Word':<15} {'Lemma':<15}")
for word in words:
    print(f"{word:<15} {lemmatizer.lemmatize(word):<15}")

print("\nNLTK Lemmatization Results (with POS tags):")
print(f"{'Word':<15} {'Lemma (Verb)':<15} {'Lemma (Adjective)':<15}")
for word in words:
    print(f"{word:<15} {lemmatizer.lemmatize(word, pos='v'):<15} {lemmatizer.lemmatize(word, pos='a'):<15}")


NLTK Lemmatization Results (default POS - noun):
Word            Lemma          
running         running        
easily          easily         
flies           fly            
better          better         
dogs            dog            
universities    university     
releasing       releasing      
released        released       

NLTK Lemmatization Results (with POS tags):
Word            Lemma (Verb)    Lemma (Adjective)
running         run             running        
easily          easily          easily         
flies           fly             flies          
better          better          good           
dogs            dog             dogs           
universities    universities    universities   
releasing       release         releasing      
released        release         released       


In [6]:
# --- spaCy Lemmatization ---
print("\nspaCy Lemmatization Results:")
print(f"{'Word':<15} {'Lemma':<15}")
doc = nlp(" ".join(words))  # Process the words as a single document
for token in doc:
    print(f"{token.text:<15} {token.lemma_:<15}")


spaCy Lemmatization Results:
Word            Lemma          
running         run            
easily          easily         
flies           fly            
better          well           
dogs            dog            
universities    university     
releasing       release        
released        release        
