In [None]:
# Use NLTK to create an extractive summarizer by selecting key sentences from the text based on word frequency.

import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.probability import FreqDist
import heapq
import re

nltk.download('punkt')
nltk.download('stopwords')

# Sample text data
text = """Machine learning is a field of artificial intelligence (AI) that enables computers to learn from data without being explicitly programmed. 
It has applications in various fields such as healthcare, finance, marketing, and agriculture. Machine learning algorithms are classified into supervised, 
unsupervised, and reinforcement learning."""

# Preprocess text and remove stopwords
sentences = sent_tokenize(text)
stop_words = set(stopwords.words("english"))
words = word_tokenize(re.sub(r'[^\w\s]', '', text.lower()))

# Compute word frequencies
word_frequencies = FreqDist(word for word in words if word not in stop_words)

# Calculate sentence scores based on word frequencies
sentence_scores = {}
for sentence in sentences:
    for word in word_tokenize(sentence.lower()):
        if word in word_frequencies:
            if sentence not in sentence_scores:
                sentence_scores[sentence] = word_frequencies[word]
            else:
                sentence_scores[sentence] += word_frequencies[word]

# Extract top sentences for summary
summary_sentences = heapq.nlargest(2, sentence_scores, key=sentence_scores.get)
summary = ' '.join(summary_sentences)
print("Extractive Summary:\n", summary)


ModuleNotFoundError: No module named 'nltk'

In [None]:
# Use Hugging Face Transformers to create an abstractive summarizer, generating new sentences that summarize the text.

from transformers import pipeline

# Load a pre-trained summarization model
summarizer = pipeline("summarization")

# Example text for summarization
text = """Machine learning is a field of artificial intelligence (AI) that enables computers to learn from data without being explicitly programmed. 
It has applications in various fields such as healthcare, finance, marketing, and agriculture. Machine learning algorithms are classified into supervised, 
unsupervised, and reinforcement learning."""

# Generate abstractive summary
abstractive_summary = summarizer(text, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
print("Abstractive Summary:\n", abstractive_summary)


In [None]:
# Compare extractive and abstractive summarization results to understand their different approaches and outputs.

print("Extractive Summary:\n", summary)
print("\nAbstractive Summary:\n", abstractive_summary)

# Extractive summarization selects key sentences directly from the original text.
# Abstractive summarization generates new sentences to capture the main ideas concisely.


In [None]:
# Use ROUGE Score to evaluate the quality of summaries by comparing them with a reference summary.

from rouge_score import rouge_scorer

# Define reference summary and candidate summaries
reference_summary = """Machine learning is a field of AI enabling computers to learn from data with diverse applications."""
extractive_summary = summary
abstractive_summary = abstractive_summary

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Calculate ROUGE scores
extractive_rouge = scorer.score(reference_summary, extractive_summary)
abstractive_rouge = scorer.score(reference_summary, abstractive_summary)

print("Extractive Summary ROUGE Score:\n", extractive_rouge)
print("\nAbstractive Summary ROUGE Score:\n", abstractive_rouge)


In [None]:
# Adjust the length of summaries by modifying parameters, such as the number of sentences in extractive summarization or length constraints in abstractive summarization.

# Extractive summarization: change the number of sentences
num_sentences = 3
summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
adjusted_summary = ' '.join(summary_sentences)
print("Adjusted Extractive Summary:\n", adjusted_summary)

# Abstractive summarization: adjust min and max length
adjusted_abstractive_summary = summarizer(text, max_length=60, min_length=30, do_sample=False)[0]['summary_text']
print("Adjusted Abstractive Summary:\n", adjusted_abstractive_summary)
