In [19]:
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import stopwords

In [20]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to /root/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!


True

In [21]:
para = """You look up at the beautiful sky and see all the stars twinkling away. And the luminous moon with its dull glow. But did you know that stars don’t really twinkle, it’s actually an illusion? And the moon does not emit any light of its own, it only reflects light from the sun. Let us explore our galaxy and learn other such captivating facts about the moon and stars"""

In [22]:
# Tokenize sentences and words
sentences = nltk.sent_tokenize(para)
words = nltk.word_tokenize(para)

In [23]:
# Initialize PorterStemmer and WordNetLemmatizer
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

In [24]:
# Get English stopwords
stop_words = set(stopwords.words('english'))

In [25]:
# Stem words
stemmed_sentences = []
for sentence in sentences:
    words = nltk.word_tokenize(sentence)
    stemmed_words = [stemmer.stem(word) for word in words if word.lower() not in stop_words]
    stemmed_sentences.append(' '.join(stemmed_words))

In [26]:
# Lemmatize words
lemmatized_sentences = []
for sentence in sentences:
    words = nltk.word_tokenize(sentence)
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words if word.lower() not in stop_words]
    lemmatized_sentences.append(' '.join(lemmatized_words))

In [27]:
# Part-of-speech tagging
pos_tags = nltk.pos_tag(nltk.word_tokenize(para))

In [28]:
# Display help for POS tag 'VBG'
nltk.help.upenn_tagset('VBG')

VBG: verb, present participle or gerund
    telegraphing stirring focusing angering judging stalling lactating
    hankerin' alleging veering capping approaching traveling besieging
    encrypting interrupting erasing wincing ...


In [29]:
# Print plural nouns (NNS)
plural_nouns = [word for word, pos in pos_tags if pos == 'NNS']
print("Plural Nouns:", plural_nouns)

Plural Nouns: ['stars', 'stars', 'facts', 'stars']


In [30]:
# Display help for POS tag 'VBG'
nltk.help.upenn_tagset('VBG')

VBG: verb, present participle or gerund
    telegraphing stirring focusing angering judging stalling lactating
    hankerin' alleging veering capping approaching traveling besieging
    encrypting interrupting erasing wincing ...


In [31]:
# Print results
print("\nStemmed Sentences:\n", stemmed_sentences)
print("\nLemmatized Sentences:\n", lemmatized_sentences)


Stemmed Sentences:
 ['look beauti sky see star twinkl away .', 'lumin moon dull glow .', 'know star ’ realli twinkl , ’ actual illus ?', 'moon emit light , reflect light sun .', 'let us explor galaxi learn captiv fact moon star']

Lemmatized Sentences:
 ['look beautiful sky see star twinkling away .', 'luminous moon dull glow .', 'know star ’ really twinkle , ’ actually illusion ?', 'moon emit light , reflects light sun .', 'Let u explore galaxy learn captivating fact moon star']
