In [1]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords, wordnet
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag, FreqDist

# Download necessary resources (only once)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

# Story text
story = """Once upon a time in a small village, there lived a curious cat named Whiskers.
Every morning, Whiskers explored the narrow lanes, chasing butterflies and peeking into gardens.
One sunny day, Whiskers discovered an old library hidden behind tall bushes.
Inside, dust-covered books whispered stories of ancient lands and brave heroes.
Excited, Whiskers spent hours jumping from shelf to shelf, knocking down books and unrolling scrolls.
From that day on, the little cat visited the hidden library daily, lost in tales of adventure."""

# 1. Sentence and word tokenization
sentences = sent_tokenize(story)
word_tokens = word_tokenize(story)

print("Sentences:\n", sentences)
print("\nWord Tokens:\n", word_tokens)

# 2. Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in word_tokens if word.lower() not in stop_words and word.isalpha()]

print("\nFiltered Tokens (no stopwords):\n", filtered_words)

# 3. Apply stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_words]

print("\nStemmed Words:\n", stemmed_words)

# 4. Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word.lower()) for word in filtered_words]

print("\nLemmatized Words:\n", lemmatized_words)

# 5. POS tagging
pos_tags = pos_tag(filtered_words)
print("\nPart-of-Speech Tags:\n", pos_tags)

# 6. Frequency Distribution
freq_dist = FreqDist(filtered_words)
print("\nFrequency Distribution:")
for word, freq in freq_dist.most_common():
    print(f"{word}: {freq}")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sonam\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sonam\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sonam\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\sonam\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Sentences:
 ['Once upon a time in a small village, there lived a curious cat named Whiskers.', 'Every morning, Whiskers explored the narrow lanes, chasing butterflies and peeking into gardens.', 'One sunny day, Whiskers discovered an old library hidden behind tall bushes.', 'Inside, dust-covered books whispered stories of ancient lands and brave heroes.', 'Excited, Whiskers spent hours jumping from shelf to shelf, knocking down books and unrolling scrolls.', 'From that day on, the little cat visited the hidden library daily, lost in tales of adventure.']

Word Tokens:
 ['Once', 'upon', 'a', 'time', 'in', 'a', 'small', 'village', ',', 'there', 'lived', 'a', 'curious', 'cat', 'named', 'Whiskers', '.', 'Every', 'morning', ',', 'Whiskers', 'explored', 'the', 'narrow', 'lanes', ',', 'chasing', 'butterflies', 'and', 'peeking', 'into', 'gardens', '.', 'One', 'sunny', 'day', ',', 'Whiskers', 'discovered', 'an', 'old', 'library', 'hidden', 'behind', 'tall', 'bushes', '.', 'Inside', ',', 'dust-c