In [64]:
import nltk

In [65]:
from nltk import sent_tokenize, word_tokenize

In [66]:
text = 'The sun set behind the distant mountains, casting a warm glow across the tranquil lake. As night fell, the stars emerged, painting the sky with their shimmering brilliance.'

In [67]:
text

'The sun set behind the distant mountains, casting a warm glow across the tranquil lake. As night fell, the stars emerged, painting the sky with their shimmering brilliance.'

In [68]:
tokenize_sent = sent_tokenize(text)

In [69]:
tokenize_sent

['The sun set behind the distant mountains, casting a warm glow across the tranquil lake.',
 'As night fell, the stars emerged, painting the sky with their shimmering brilliance.']

In [70]:
tokenize_word= word_tokenize(text)

In [71]:
len(tokenize_word)

33

In [72]:
tokenize_word

['The',
 'sun',
 'set',
 'behind',
 'the',
 'distant',
 'mountains',
 ',',
 'casting',
 'a',
 'warm',
 'glow',
 'across',
 'the',
 'tranquil',
 'lake',
 '.',
 'As',
 'night',
 'fell',
 ',',
 'the',
 'stars',
 'emerged',
 ',',
 'painting',
 'the',
 'sky',
 'with',
 'their',
 'shimmering',
 'brilliance',
 '.']

In [73]:
from nltk.stem import PorterStemmer
stem=[]
for i in tokenize_word:
    porter = PorterStemmer()
    stem_word = porter.stem(i)
    stem.append(stem_word)
print(stem)

['the', 'sun', 'set', 'behind', 'the', 'distant', 'mountain', ',', 'cast', 'a', 'warm', 'glow', 'across', 'the', 'tranquil', 'lake', '.', 'as', 'night', 'fell', ',', 'the', 'star', 'emerg', ',', 'paint', 'the', 'sky', 'with', 'their', 'shimmer', 'brillianc', '.']


In [74]:
porter.stem('was')

'wa'

In [75]:
from nltk.stem import WordNetLemmatizer
lem_list = []
for i in tokenize_word:
    lemmatizer = WordNetLemmatizer()
    lem_word = lemmatizer.lemmatize(i)
    lem_list.append(lem_word)
print(lem_list)

['The', 'sun', 'set', 'behind', 'the', 'distant', 'mountain', ',', 'casting', 'a', 'warm', 'glow', 'across', 'the', 'tranquil', 'lake', '.', 'As', 'night', 'fell', ',', 'the', 'star', 'emerged', ',', 'painting', 'the', 'sky', 'with', 'their', 'shimmering', 'brilliance', '.']


In [76]:
from nltk import pos_tag

In [77]:
pos_list = nltk.pos_tag(lem_list)

In [78]:
print(pos_list)

[('The', 'DT'), ('sun', 'NN'), ('set', 'VBN'), ('behind', 'IN'), ('the', 'DT'), ('distant', 'JJ'), ('mountain', 'NN'), (',', ','), ('casting', 'VBG'), ('a', 'DT'), ('warm', 'JJ'), ('glow', 'NN'), ('across', 'IN'), ('the', 'DT'), ('tranquil', 'NN'), ('lake', 'NN'), ('.', '.'), ('As', 'IN'), ('night', 'NN'), ('fell', 'VBD'), (',', ','), ('the', 'DT'), ('star', 'NN'), ('emerged', 'VBD'), (',', ','), ('painting', 'VBG'), ('the', 'DT'), ('sky', 'NN'), ('with', 'IN'), ('their', 'PRP$'), ('shimmering', 'NN'), ('brilliance', 'NN'), ('.', '.')]


In [79]:
#nltk.help.brown_tagset()

In [80]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')
filtered_words = [w for w in tokenize_word if not w.lower() in stop_words]

In [81]:
filtered_words

['sun',
 'set',
 'behind',
 'distant',
 'mountains',
 ',',
 'casting',
 'warm',
 'glow',
 'across',
 'tranquil',
 'lake',
 '.',
 'night',
 'fell',
 ',',
 'stars',
 'emerged',
 ',',
 'painting',
 'sky',
 'shimmering',
 'brilliance',
 '.']

In [82]:
new_text = ' '.join(filtered_words)

In [83]:
new_text

'sun set behind distant mountains , casting warm glow across tranquil lake . night fell , stars emerged , painting sky shimmering brilliance .'

In [84]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [85]:
tfidf = TfidfVectorizer()
result = tfidf.fit_transform(tokenize_word)

In [86]:
print(tfidf.vocabulary_)

{'the': 18, 'sun': 17, 'set': 13, 'behind': 2, 'distant': 5, 'mountains': 10, 'casting': 4, 'warm': 21, 'glow': 8, 'across': 0, 'tranquil': 20, 'lake': 9, 'as': 1, 'night': 11, 'fell': 7, 'stars': 16, 'emerged': 6, 'painting': 12, 'sky': 15, 'with': 22, 'their': 19, 'shimmering': 14, 'brilliance': 3}


In [89]:
print("\nidf values : ")
for ele1, ele2 in zip(tfidf.get_feature_names_out(), tfidf.idf_):
    print(ele1, ":", ele2)


idf values : 
across : 3.833213344056216
as : 3.833213344056216
behind : 3.833213344056216
brilliance : 3.833213344056216
casting : 3.833213344056216
distant : 3.833213344056216
emerged : 3.833213344056216
fell : 3.833213344056216
glow : 3.833213344056216
lake : 3.833213344056216
mountains : 3.833213344056216
night : 3.833213344056216
painting : 3.833213344056216
set : 3.833213344056216
shimmering : 3.833213344056216
sky : 3.833213344056216
stars : 3.833213344056216
sun : 3.833213344056216
the : 2.734601055388106
their : 3.833213344056216
tranquil : 3.833213344056216
warm : 3.833213344056216
with : 3.833213344056216
