In [6]:
## Stemmer Example

words = [
    "history",
    "historical",   
    "historically",
    "sit",
    "sits",
    "sat",
    "sitting",
    "write",
    "writes",
    "wrote",
    "writing",
    "written",
    "running",
    "run",
    "runs", 
    "ran",
    "easily",
    "fair",
    "fairness",
    "fairly",
    "happily",
    "happier",
    "happiest",
    "happiness"]

print("Words before stemming:")
for word in words:
    print(word)

Words before stemming:
history
historical
historically
sit
sits
sat
sitting
write
writes
wrote
writing
written
running
run
runs
ran
easily
fair
fairness
fairly
happily
happier
happiest
happiness


In [8]:
"""
PorterStemmer is a popular stemming algorithm in Natural Language Processing (NLP) that reduces words to their root/base form by removing suffixes.

Key Features:
Purpose: Converts words to their stem by stripping common suffixes
Algorithm: Based on Martin Porter's 1980 stemming algorithm
Language: Primarily designed for English
Approach: Rule-based suffix removal
"""
from nltk.stem import PorterStemmer

stemming = PorterStemmer()

for word in words:
    print(f"{word} -> {stemming.stem(word)}")


history -> histori
historical -> histor
historically -> histor
sit -> sit
sits -> sit
sat -> sat
sitting -> sit
write -> write
writes -> write
wrote -> wrote
writing -> write
written -> written
running -> run
run -> run
runs -> run
ran -> ran
easily -> easili
fair -> fair
fairness -> fair
fairly -> fairli
happily -> happili
happier -> happier
happiest -> happiest
happiness -> happi


In [10]:
"""
RegexStemmer is a customizable stemming algorithm in NLTK that uses regular expressions to remove suffixes from words.

Key Features:
Approach: Rule-based using regular expressions
Customizable: You define your own patterns to remove
Simple: Basic suffix removal based on regex patterns
Language-agnostic: Can work with any language if you provide the right patterns
"""
from nltk.stem import RegexpStemmer

stemmer = RegexpStemmer('ing$|ly$|ed$|ies$|ied$|ies$|s$|ss$', min=4)

print("RegexStemmer Results:")
for word in words:
    stemmed = stemmer.stem(word)
    print(f"{word} -> {stemmed}")


RegexStemmer Results:
history -> history
historical -> historical
historically -> historical
sit -> sit
sits -> sit
sat -> sat
sitting -> sitt
write -> write
writes -> write
wrote -> wrote
writing -> writ
written -> written
running -> runn
run -> run
runs -> run
ran -> ran
easily -> easi
fair -> fair
fairness -> fairne
fairly -> fair
happily -> happi
happier -> happier
happiest -> happiest
happiness -> happine


In [12]:
"""
SnowballStemmer is an improved and more advanced stemming algorithm developed by Martin Porter as a successor to the original PorterStemmer.

Key Features:
Multi-language support: Works with 15+ languages (English, French, German, Spanish, etc.)
Improved accuracy: Better than PorterStemmer with fewer over-stemming issues
Consistent: More predictable results
Framework: Part of the Snowball stemming framework
"""

from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")
for word in words:
    stemmed = stemmer.stem(word)
    print(f"{word} -> {stemmed}")
    

history -> histori
historical -> histor
historically -> histor
sit -> sit
sits -> sit
sat -> sat
sitting -> sit
write -> write
writes -> write
wrote -> wrote
writing -> write
written -> written
running -> run
run -> run
runs -> run
ran -> ran
easily -> easili
fair -> fair
fairness -> fair
fairly -> fair
happily -> happili
happier -> happier
happiest -> happiest
happiness -> happi
