In [1]:
import nltk

In [7]:
paragraph = """In a small, picturesque village nestled between towering mountains, the inhabitants led a tranquil life, relying on farming and weaving for their livelihood. Every morning, the farmers would head to the fields, tending to their crops with meticulous care, while the weavers sat by their looms, crafting intricate patterns into fabrics that were admired far and wide. Children played by the riverbank, their laughter echoing through the valley, as the elderly shared tales of their youth under the shade of ancient oak trees. The seasons shaped their routines, with spring bringing a burst of colors and activity, summer offering long days of work and warmth, autumn filling their granaries with the year's harvest, and winter inviting them to gather by the hearth, exchanging stories and songs. Despite the simplicity of their lives, the villagers felt a profound connection to the land and each other, finding joy and fulfillment in their everyday tasks and the rhythms of nature."""

In [8]:
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer

In [9]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

In [10]:
sentences = nltk.sent_tokenize(paragraph)

In [11]:
for sentence in sentences:
    print(sentence)

In a small, picturesque village nestled between towering mountains, the inhabitants led a tranquil life, relying on farming and weaving for their livelihood.
Every morning, the farmers would head to the fields, tending to their crops with meticulous care, while the weavers sat by their looms, crafting intricate patterns into fabrics that were admired far and wide.
Children played by the riverbank, their laughter echoing through the valley, as the elderly shared tales of their youth under the shade of ancient oak trees.
The seasons shaped their routines, with spring bringing a burst of colors and activity, summer offering long days of work and warmth, autumn filling their granaries with the year's harvest, and winter inviting them to gather by the hearth, exchanging stories and songs.
Despite the simplicity of their lives, the villagers felt a profound connection to the land and each other, finding joy and fulfillment in their everyday tasks and the rhythms of nature.


In [12]:
len(sentences)

5

In [15]:
corpus = []

In [20]:
for i in range (len(sentences)):
    review = re.sub('[^a-zA-Z]', ' ', sentences[i])
    review = review.lower()
    review = review.split()
    review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

In [21]:
for corp in corpus:
    print(corp)

small picturesqu villag nestl tower mountain inhabit led tranquil life reli farm weav livelihood
everi morn farmer would head field tend crop meticul care weaver sat loom craft intric pattern fabric admir far wide
children play riverbank laughter echo valley elderli share tale youth shade ancient oak tree
season shape routin spring bring burst color activ summer offer long day work warmth autumn fill granari year harvest winter invit gather hearth exchang stori song
despit simplic live villag felt profound connect land find joy fulfil everyday task rhythm natur
small picturesque village nestled towering mountain inhabitant led tranquil life relying farming weaving livelihood
every morning farmer would head field tending crop meticulous care weaver sat loom crafting intricate pattern fabric admired far wide
child played riverbank laughter echoing valley elderly shared tale youth shade ancient oak tree
season shaped routine spring bringing burst color activity summer offering long day wo

In [22]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X = cv.fit_transform(corpus).toarray()

In [24]:
print(X)

[[0 0 0 ... 0 0 0]
 [0 0 1 ... 1 0 0]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 1]
 [0 1 0 ... 0 1 0]
 [0 0 0 ... 0 0 0]]


In [28]:
import pandas as pd

In [30]:
feature_names = cv.get_feature_names_out() 
df = pd.DataFrame(X, columns=feature_names, index=[f" {i+1}" for i in range(len(corpus))])
print(df)

    activ  activity  admir  admired  ancient  autumn  bring  bringing  burst  \
1       0         0      0        0        0       0      0         0      0   
2       0         0      1        0        0       0      0         0      0   
3       0         0      0        0        1       0      0         0      0   
4       1         0      0        0        0       1      1         0      1   
5       0         0      0        0        0       0      0         0      0   
6       0         0      0        0        0       0      0         0      0   
7       0         0      0        1        0       0      0         0      0   
8       0         0      0        0        1       0      0         0      0   
9       0         1      0        0        0       1      0         1      1   
10      0         0      0        0        0       0      0         0      0   

    care  ...  warmth  weav  weaver  weaving  wide  winter  work  would  year  \
1      0  ...       0     1       0   