In [1]:
!pip install nltk




In [2]:
import nltk
import re
import heapq
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize


In [3]:
nltk.download('punkt')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

Input Article

In [4]:
article_text = """
Artificial Intelligence (AI) is revolutionizing modern industries.
It enables machines to mimic human intelligence such as learning,
problem-solving, and decision-making. AI is widely used in healthcare,
finance, autonomous vehicles, recommendation systems, and cybersecurity.
Despite its benefits, AI raises ethical challenges including bias,
privacy concerns, and job displacement. Responsible AI emphasizes fairness,
transparency, and accountability. The future of AI depends on collaboration
between researchers, governments, and organizations worldwide.
"""


Text Preprocessing

In [5]:
def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    text = text.lower()
    return text


Word Frequency Calculation

In [6]:
def calculate_word_frequencies(text):
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text)

    word_freq = {}
    for word in words:
        if word not in stop_words:
            if word not in word_freq:
                word_freq[word] = 1
            else:
                word_freq[word] += 1

    max_freq = max(word_freq.values())
    for word in word_freq:
        word_freq[word] /= max_freq

    return word_freq


Sentence Scoring

In [7]:
def score_sentences(sentences, word_freq):
    sentence_scores = {}

    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_freq:
                if sentence not in sentence_scores:
                    sentence_scores[sentence] = word_freq[word]
                else:
                    sentence_scores[sentence] += word_freq[word]

    return sentence_scores


Generate Coalesced Summary

In [8]:
def generate_summary(text, summary_length=3):
    processed_text = preprocess_text(text)
    sentences = sent_tokenize(text)
    word_freq = calculate_word_frequencies(processed_text)
    sentence_scores = score_sentences(sentences, word_freq)

    summary_sentences = heapq.nlargest(
        summary_length, sentence_scores, key=sentence_scores.get
    )

    summary = " ".join(summary_sentences)
    return summary


Run the Summarizer

In [11]:
import nltk
nltk.download('punkt_tab')

summary = generate_summary(article_text, summary_length=3)

print("üìÑ ORIGINAL ARTICLE:\n")
print(article_text)

print("\nüìù COALESCED SUMMARY:\n")
print(summary)


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


üìÑ ORIGINAL ARTICLE:


Artificial Intelligence (AI) is revolutionizing modern industries.
It enables machines to mimic human intelligence such as learning,
problem-solving, and decision-making. AI is widely used in healthcare,
finance, autonomous vehicles, recommendation systems, and cybersecurity.
Despite its benefits, AI raises ethical challenges including bias,
privacy concerns, and job displacement. Responsible AI emphasizes fairness,
transparency, and accountability. The future of AI depends on collaboration
between researchers, governments, and organizations worldwide.


üìù COALESCED SUMMARY:

Despite its benefits, AI raises ethical challenges including bias,
privacy concerns, and job displacement. AI is widely used in healthcare,
finance, autonomous vehicles, recommendation systems, and cybersecurity. The future of AI depends on collaboration
between researchers, governments, and organizations worldwide.
