
This notebook demonstrates how to use NLTK's stopwords and stemming functionalities, and how to apply POS tagging to tokenized words.


In [1]:
# Import necessary libraries
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

# Download required resources
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sujal\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sujal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\sujal\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [2]:
# Initialize the stemmer
stemming = PorterStemmer()


In [3]:
# Define example paragraph
paragraph = """
Natural Language Processing with Python is interesting. It allows computers to understand human language.
Stemming helps reduce words to their base form. The weather today is nice and sunny.
"""

# Sentence tokenization
sentences = nltk.sent_tokenize(paragraph)


In [4]:
# Apply stopwords filtering, stemming, and POS tagging
for i in range(len(sentences)):
    words = nltk.word_tokenize(sentences[i])
    words = [word for word in words if word.lower() not in set(stopwords.words('english'))]
    stemmed_words = [stemming.stem(word) for word in words]
    postag = nltk.pos_tag(stemmed_words)
    print(f"Original sentence: {sentences[i]}")
    print(f"After removing stopwords and stemming: {stemmed_words}")
    print(f"POS Tags: {postag}")
    print("\n" + "-"*60 + "\n")


Original sentence: 
Natural Language Processing with Python is interesting.
After removing stopwords and stemming: ['natur', 'languag', 'process', 'python', 'interest', '.']
POS Tags: [('natur', 'JJ'), ('languag', 'NN'), ('process', 'NN'), ('python', 'NN'), ('interest', 'NN'), ('.', '.')]

------------------------------------------------------------

Original sentence: It allows computers to understand human language.
After removing stopwords and stemming: ['allow', 'comput', 'understand', 'human', 'languag', '.']
POS Tags: [('allow', 'VB'), ('comput', 'NN'), ('understand', 'JJ'), ('human', 'JJ'), ('languag', 'NN'), ('.', '.')]

------------------------------------------------------------

Original sentence: Stemming helps reduce words to their base form.
After removing stopwords and stemming: ['stem', 'help', 'reduc', 'word', 'base', 'form', '.']
POS Tags: [('stem', 'NN'), ('help', 'NN'), ('reduc', 'VB'), ('word', 'NN'), ('base', 'NN'), ('form', 'NN'), ('.', '.')]

-------------------