In [3]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from textblob import TextBlob
import spacy

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')
nlp = spacy.load("en_core_web_sm")

text = """I spent my vacation at home. During this time, I practiced coding and engaged in
creative activities such as painting and trekking. I followed my hobbies, relaxed,
and enjoyed my free time. On some days, I visited my aunt’s house, where there is a dog.
I spent my whole day with that dog, as I love animals very much. Being there made me
extremely happy. This is how I spent my vacation."""

tokens = word_tokenize(text)
print("\nTOKENS:")
print(tokens)

stemmer = PorterStemmer()
stems = [stemmer.stem(word) for word in tokens]
print("\nSTEMMING:")
print(stems)

lemmatizer = WordNetLemmatizer()
lemmas = [lemmatizer.lemmatize(word) for word in tokens]
print("\nLEMMATIZATION:")
print(lemmas)

pos_tags = nltk.pos_tag(tokens, lang='eng')
print("\nPOS TAGGING (NLTK):")
print(pos_tags)

print("\nPOS TAGGING (SPACY):")
doc = nlp(text)
for token in doc:
    print(f"{token.text} -> {token.pos_}")

blob = TextBlob(text)
print("\nSENTIMENT ANALYSIS:")
print("Polarity:", blob.sentiment.polarity)
print("Subjectivity:", blob.sentiment.subjectivity)

if blob.sentiment.polarity > 0:
    print("Overall Sentiment: Positive ")
elif blob.sentiment.polarity < 0:
    print("Overall Sentiment: Negative ")
else:
    print("Overall Sentiment: Neutral ")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.



TOKENS:
['I', 'spent', 'my', 'vacation', 'at', 'home', '.', 'During', 'this', 'time', ',', 'I', 'practiced', 'coding', 'and', 'engaged', 'in', 'creative', 'activities', 'such', 'as', 'painting', 'and', 'trekking', '.', 'I', 'followed', 'my', 'hobbies', ',', 'relaxed', ',', 'and', 'enjoyed', 'my', 'free', 'time', '.', 'On', 'some', 'days', ',', 'I', 'visited', 'my', 'aunt', '’', 's', 'house', ',', 'where', 'there', 'is', 'a', 'dog', '.', 'I', 'spent', 'my', 'whole', 'day', 'with', 'that', 'dog', ',', 'as', 'I', 'love', 'animals', 'very', 'much', '.', 'Being', 'there', 'made', 'me', 'extremely', 'happy', '.', 'This', 'is', 'how', 'I', 'spent', 'my', 'vacation', '.']

STEMMING:
['i', 'spent', 'my', 'vacat', 'at', 'home', '.', 'dure', 'thi', 'time', ',', 'i', 'practic', 'code', 'and', 'engag', 'in', 'creativ', 'activ', 'such', 'as', 'paint', 'and', 'trek', '.', 'i', 'follow', 'my', 'hobbi', ',', 'relax', ',', 'and', 'enjoy', 'my', 'free', 'time', '.', 'on', 'some', 'day', ',', 'i', 'visit

In [2]:
import nltk
from nltk.tokenize import WhitespaceTokenizer, WordPunctTokenizer
from nltk.tokenize import TreebankWordTokenizer, TweetTokenizer
from nltk.tokenize import MWETokenizer
from nltk.stem import PorterStemmer, SnowballStemmer
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')

text = """I spent my vacation at home. During this time, I practiced coding and engaged in
creative activities such as painting and trekking. I followed my hobbies, relaxed,
and enjoyed my free time. On some days, I visited my aunt’s house, where there is a dog.
I spent my whole day with that dog, as I love animals very much. Being there made me
extremely happy. This is how I spent my vacation."""

# Whitespace Tokenization
wt = WhitespaceTokenizer()
print("\nWhitespace Tokenization:")
print(wt.tokenize(text))

# Punctuation-based Tokenization
pt = WordPunctTokenizer()
print("\nPunctuation-based Tokenization:")
print(pt.tokenize(text))

# Treebank Tokenization
tbt = TreebankWordTokenizer()
print("\nTreebank Tokenization:")
print(tbt.tokenize(text))

# Tweet Tokenization
tweet = TweetTokenizer()
print("\nTweet Tokenization:")
print(tweet.tokenize(text))

# MWE Tokenization
mwe = MWETokenizer([('creative', 'activities'), ('vacation', 'at', 'home')], separator='_')
print("\nMWE Tokenization:")
print(mwe.tokenize(text.split()))

# Stemming
porter = PorterStemmer()
snowball = SnowballStemmer("english")

tokens = tbt.tokenize(text)

print("\nPorter Stemming:")
print([porter.stem(word) for word in tokens])

print("\nSnowball Stemming:")
print([snowball.stem(word) for word in tokens])

# Lemmatization
lemmatizer = WordNetLemmatizer()
print("\nLemmatization:")
print([lemmatizer.lemmatize(word) for word in tokens])



Whitespace Tokenization:
['I', 'spent', 'my', 'vacation', 'at', 'home.', 'During', 'this', 'time,', 'I', 'practiced', 'coding', 'and', 'engaged', 'in', 'creative', 'activities', 'such', 'as', 'painting', 'and', 'trekking.', 'I', 'followed', 'my', 'hobbies,', 'relaxed,', 'and', 'enjoyed', 'my', 'free', 'time.', 'On', 'some', 'days,', 'I', 'visited', 'my', 'aunt’s', 'house,', 'where', 'there', 'is', 'a', 'dog.', 'I', 'spent', 'my', 'whole', 'day', 'with', 'that', 'dog,', 'as', 'I', 'love', 'animals', 'very', 'much.', 'Being', 'there', 'made', 'me', 'extremely', 'happy.', 'This', 'is', 'how', 'I', 'spent', 'my', 'vacation.']

Punctuation-based Tokenization:
['I', 'spent', 'my', 'vacation', 'at', 'home', '.', 'During', 'this', 'time', ',', 'I', 'practiced', 'coding', 'and', 'engaged', 'in', 'creative', 'activities', 'such', 'as', 'painting', 'and', 'trekking', '.', 'I', 'followed', 'my', 'hobbies', ',', 'relaxed', ',', 'and', 'enjoyed', 'my', 'free', 'time', '.', 'On', 'some', 'days', ','

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
