<a href="https://colab.research.google.com/github/ragulmurugavel07-byte/NLP-Project/blob/main/NLP_project_15.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==============================
# TEXT SUMMARIZER (WORKING CODE)
# ==============================

# Step 1: Install & Import
import nltk
import heapq
import re

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

# Step 2: Download required datasets (run once)
nltk.download('punkt')
nltk.download('punkt_tab')   # Required in Python 3.12+
nltk.download('stopwords')

# Step 3: Input Text
text = """
Artificial Intelligence is transforming the world.
It is used in healthcare, education, finance, and many other industries.
AI helps machines learn from data and make decisions.
Machine learning is a subset of artificial intelligence.
Deep learning is a specialized form of machine learning.
AI is shaping the future of technology.
"""

# Step 4: Clean Text
clean_text = re.sub(r'\s+', ' ', text)
clean_text = re.sub('[^a-zA-Z]', ' ', clean_text)
clean_text = clean_text.lower()

# Step 5: Tokenization
words = word_tokenize(clean_text)
sentences = sent_tokenize(text)

# Step 6: Remove Stopwords & Calculate Word Frequency
stop_words = set(stopwords.words('english'))
word_frequencies = {}

for word in words:
    if word not in stop_words:
        word_frequencies[word] = word_frequencies.get(word, 0) + 1

# Step 7: Normalize Frequency
max_freq = max(word_frequencies.values())

for word in word_frequencies:
    word_frequencies[word] /= max_freq

# Step 8: Sentence Scoring
sentence_scores = {}

for sentence in sentences:
    for word in word_tokenize(sentence.lower()):
        if word in word_frequencies:
            sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_frequencies[word]

# Step 9: Select Top 2 Sentences
summary_sentences = heapq.nlargest(2, sentence_scores, key=sentence_scores.get)

# Step 10: Print Summary
summary = ' '.join(summary_sentences)

print("===== SUMMARY =====")
print(summary)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


===== SUMMARY =====
Deep learning is a specialized form of machine learning. Machine learning is a subset of artificial intelligence.


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
