In [1]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [9]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [2]:
def preprocess(text):
    tokens = word_tokenize(text.lower())  # Convert to lowercase for consistency

    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]

    return tokens

In [3]:
def vectorize(tokens, text):
    # Vectorization
    vectorizer = CountVectorizer()
    token_matrix = vectorizer.fit_transform([" ".join(tokens), text])  # Create a matrix of token counts
    return token_matrix

In [4]:
def summarize(text, top_n=2):
    sentences = sent_tokenize(text)
    tokens = preprocess(text)

    token_matrix = vectorize(tokens, text)

    similarity = cosine_similarity(token_matrix)[0]  # Calculate similarity between the tokenized text and each sentence
    print("Vector Similarity Scores:")
    for i, score in enumerate(similarity):
        print(f"Sentence {i+1}: {score}")

    top_indices = similarity.argsort()[-top_n:][::-1]  # Get indices of most similar sentences
    summary = [sentences[i] for i in top_indices]  # Extract most similar sentences

    return ' '.join(summary)

In [11]:
text = input("Enter your text:")


Enter your text:In the heart of a small town, there was an old, forgotten library. The building was grand, with towering bookshelves that seemed to touch the sky and windows that let in rays of golden sunlight. But over the years, people had stopped visiting. The books collected dust, and the once vibrant atmosphere had turned silent.  Lena, a young girl with a curious mind, stumbled upon this library one rainy afternoon. She pushed open the creaky door and stepped inside, her eyes widening in awe. Rows and rows of books greeted her, each one holding secrets waiting to be discovered.


In [12]:
summary = summarize(text)
print("\nSummary:")
print(summary)

Vector Similarity Scores:
Sentence 1: 0.9999999999999993
Sentence 2: 0.602193791596494

Summary:
In the heart of a small town, there was an old, forgotten library. The building was grand, with towering bookshelves that seemed to touch the sky and windows that let in rays of golden sunlight.
