<a href="https://colab.research.google.com/github/mahendrasawant26/text-summarizer-nlp/blob/main/text-summarize.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from nltk.tokenize.treebank import TreebankWordDetokenizer
import string

nltk.download('punkt')
nltk.download('stopwords')

def preprocess_text(text):
    # Tokenize the text into words and remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text)
    words = [word.lower() for word in words if word.isalnum()]
    words = [word for word in words if word not in stop_words]
    return words

def calculate_word_frequencies(words):
    # Calculate word frequencies using FreqDist
    word_freq = FreqDist(words)
    return word_freq

def calculate_sentence_scores(sentences, word_freq):
    # Calculate sentence scores based on word frequencies
    sentence_scores = {}
    for sentence in sentences:
        for word in nltk.word_tokenize(sentence.lower()):
            if word in word_freq:
                if len(sentence.split(' ')) < 30:  # Limit sentence length
                    if sentence not in sentence_scores:
                        sentence_scores[sentence] = word_freq[word]
                    else:
                        sentence_scores[sentence] += word_freq[word]
    return sentence_scores

def summarize(text):
    sentences = sent_tokenize(text)
    words = preprocess_text(text)
    word_freq = calculate_word_frequencies(words)
    sentence_scores = calculate_sentence_scores(sentences, word_freq)

    # Sort sentences by score in descending order
    sorted_sentences = sorted(sentence_scores.items(), key=lambda x: x[1], reverse=True)

    # Select the top 3 sentences as the summary
    summary_sentences = [x[0] for x in sorted_sentences[:3]]

    # Join the sentences to create the final summary
    summary = TreebankWordDetokenizer().detokenize(summary_sentences)
    return summary

# Example usage
text = """
    The sun was setting, painting the sky with hues of red and orange.
    Birds were flying back to their nests, chirping their evening songs.
    The cool breeze rustled through the leaves of the trees, bringing a sense of tranquility.
    As the day came to a close, people hurried back home, eager to reunite with their loved ones.
    The distant sounds of laughter and conversations filled the air. Amidst this, nature was preparing for the night,
    with nocturnal creatures starting to stir. The city lights began to flicker, illuminating the urban landscape.
    It was a moment of reflection, a pause in the bustling day. A feeling of gratitude for the beauty that surrounds us.
    The evening had a magical quality, promising serenity and relaxation. The colors in the sky deepened, hinting at the night's arrival.
    Stars began to twinkle, unveiling the vastness of the cosmos. Each moment seemed to slow down, allowing for introspection.
    Life, in its simple moments, was truly remarkable. The world seemed to hush, embracing the peaceful transition from day to night.
    A sense of calmness enveloped everything, a prelude to the stillness of the night. The moon made its appearance, casting a gentle glow.
    It was a reminder of the continuity of time, the ever-moving cycle of nature. The day had left its mark, and now it was time for the night to tell its own story.
"""

summary = summarize(text)
print("Summary:")
print(summary)


Summary:
The world seemed to hush, embracing the peaceful transition from day to night. As the day came to a close, people hurried back home, eager to reunite with their loved ones. The day had left its mark, and now it was time for the night to tell its own story.


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
