In [2]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer


nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')

# Input sentence
sentence = "One day, Leo, a young and boastful lion, challenged Ellie to a strength contest. But Ellie proposed a different challenge: to make the most animals smile in a day."

# Tokenization
tokens = word_tokenize(sentence)
print("Tokens:", tokens)

# Removing stop words
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print("Tokens after removing stop words:", filtered_tokens)

# Stemming
stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
print("Stemmed tokens:", stemmed_tokens)

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
print("Lemmatized tokens:", lemmatized_tokens)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Tokens: ['One', 'day', ',', 'Leo', ',', 'a', 'young', 'and', 'boastful', 'lion', ',', 'challenged', 'Ellie', 'to', 'a', 'strength', 'contest', '.', 'But', 'Ellie', 'proposed', 'a', 'different', 'challenge', ':', 'to', 'make', 'the', 'most', 'animals', 'smile', 'in', 'a', 'day', '.']
Tokens after removing stop words: ['One', 'day', ',', 'Leo', ',', 'young', 'boastful', 'lion', ',', 'challenged', 'Ellie', 'strength', 'contest', '.', 'Ellie', 'proposed', 'different', 'challenge', ':', 'make', 'animals', 'smile', 'day', '.']
Stemmed tokens: ['one', 'day', ',', 'leo', ',', 'young', 'boast', 'lion', ',', 'challeng', 'elli', 'strength', 'contest', '.', 'elli', 'propos', 'differ', 'challeng', ':', 'make', 'anim', 'smile', 'day', '.']
Lemmatized tokens: ['One', 'day', ',', 'Leo', ',', 'young', 'boastful', 'lion', ',', 'challenged', 'Ellie', 'strength', 'contest', '.', 'Ellie', 'proposed', 'different', 'challenge', ':', 'make', 'animal', 'smile', 'day', '.']


In [3]:
from collections import Counter
from nltk import ngrams
import nltk

# Make sure to download required nltk data
nltk.download('punkt')

# Sample text
text = "One day, Leo, a young and boastful lion, challenged Ellie to a strength contest. But Ellie proposed a different challenge: to make the most animals smile in a day."

# Tokenize text into words
words = nltk.word_tokenize(text.lower())

# Unigrams
unigrams = Counter(words)
print("Unigrams:")
print(unigrams)

# Bigrams
bigrams = Counter(ngrams(words, 2))
print("\nBigrams:")
print(bigrams)

# Trigrams
trigrams = Counter(ngrams(words, 3))
print("\nTrigrams:")
print(trigrams)


Unigrams:
Counter({'a': 4, ',': 3, 'day': 2, 'ellie': 2, 'to': 2, '.': 2, 'one': 1, 'leo': 1, 'young': 1, 'and': 1, 'boastful': 1, 'lion': 1, 'challenged': 1, 'strength': 1, 'contest': 1, 'but': 1, 'proposed': 1, 'different': 1, 'challenge': 1, ':': 1, 'make': 1, 'the': 1, 'most': 1, 'animals': 1, 'smile': 1, 'in': 1})

Bigrams:
Counter({('one', 'day'): 1, ('day', ','): 1, (',', 'leo'): 1, ('leo', ','): 1, (',', 'a'): 1, ('a', 'young'): 1, ('young', 'and'): 1, ('and', 'boastful'): 1, ('boastful', 'lion'): 1, ('lion', ','): 1, (',', 'challenged'): 1, ('challenged', 'ellie'): 1, ('ellie', 'to'): 1, ('to', 'a'): 1, ('a', 'strength'): 1, ('strength', 'contest'): 1, ('contest', '.'): 1, ('.', 'but'): 1, ('but', 'ellie'): 1, ('ellie', 'proposed'): 1, ('proposed', 'a'): 1, ('a', 'different'): 1, ('different', 'challenge'): 1, ('challenge', ':'): 1, (':', 'to'): 1, ('to', 'make'): 1, ('make', 'the'): 1, ('the', 'most'): 1, ('most', 'animals'): 1, ('animals', 'smile'): 1, ('smile', 'in'): 1, ('

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
