In [14]:
# Stemming is the process of reducing a word to its base or root form.
# This is useful in natural language processing tasks where you want to treat different forms of a word as the same.
# Stemming can be done using various algorithms, such as the Porter stemming algorithm or the Snowball stemming algorithm.

In [15]:
words = ["goes", "going", "gone", "went","eats", "eating", "eaten", "ate","writes", "writing", "written", "wrote","dances", "dancing", "danced", "dance"]

In [16]:
from nltk.stem import PorterStemmer

In [17]:
stemming =PorterStemmer()
for word in words:
    print(f"{word} -> {stemming.stem(word)}")

goes -> goe
going -> go
gone -> gone
went -> went
eats -> eat
eating -> eat
eaten -> eaten
ate -> ate
writes -> write
writing -> write
written -> written
wrote -> wrote
dances -> danc
dancing -> danc
danced -> danc
dance -> danc


In [18]:
## RegexpStemmer Class
# The `RegexpStemmer` class in NLTK allows you to define custom stemming rules using regular expressions. 
# This can be useful when you want to create a stemmer that follows specific patterns or rules for your dataset.

In [19]:
from nltk.stem import RegexpStemmer

In [20]:
reg_stemmer = RegexpStemmer(r"ing$|ed$|es$|s$", min=4)

In [21]:
reg_stemmer.stem('eating')

'eat'

In [22]:
# Regular expressions can be used to define complex patterns for stemming.
# For example, you can create a stemmer that removes specific suffixes or prefixes from words.
# This allows for more flexibility in how words are stemmed compared to traditional stemming algorithms.
# The `min` parameter specifies the minimum length of the word to be stemmed.

In [23]:
## Snowball Stemmer
# The Snowball stemmer is a more advanced stemming algorithm that supports multiple languages.
# It can handle a wide range of languages and can be customized for specific needs.
# The Snowball stemmer is implemented in NLTK using the SnowballStemmer class.


In [24]:
from nltk.stem import SnowballStemmer
snowball_stemmer = SnowballStemmer("english")

In [25]:
for word in words:
    print(f"{word} -> {snowball_stemmer.stem(word)}")

goes -> goe
going -> go
gone -> gone
went -> went
eats -> eat
eating -> eat
eaten -> eaten
ate -> ate
writes -> write
writing -> write
written -> written
wrote -> wrote
dances -> danc
dancing -> danc
danced -> danc
dance -> danc


In [26]:
stemming.stem("fairly"),stemming.stem("sportingly"),stemming.stem("unfairly"),stemming.stem("unfairness")

('fairli', 'sportingli', 'unfairli', 'unfair')

In [28]:
snowball_stemmer.stem("fairly"),snowball_stemmer.stem("sportingly"),snowball_stemmer.stem("unfairly"),snowball_stemmer.stem("goes")

('fair', 'sport', 'unfair', 'goe')

In [None]:
# The Snowball stemmer is more sophisticated than the Porter stemmer and can handle a wider range of words and languages.
# It is particularly useful for applications that require high accuracy in stemming.
# The Snowball stemmer can be used in various natural language processing tasks, such as text classification, information retrieval, and sentiment analysis.
# It is also available in other programming languages, making it a versatile choice for stemming tasks.