In [2]:
!pip --quiet install sumy

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.3/97.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m69.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for breadability (setup.py) ... [?25l[?25hdone
  Building wheel for docopt (setup.py) ... [?25l[?25hdone


In [4]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import string
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

# Frequency-based Summarization
def frequency_based_summarization(text):
    words = word_tokenize(text.lower())
    sentences = sent_tokenize(text)

    stop_words = set(stopwords.words("english"))
    words = [word for word in words if word not in stop_words and word not in string.punctuation]

    word_freq = {}
    for word in words:
        word_freq[word] = word_freq.get(word, 0) + 1

    max_freq = max(word_freq.values())
    word_freq = {word: freq / max_freq for word, freq in word_freq.items()}

    sentence_scores = {}
    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_freq:
                sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_freq[word]

    sorted_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)
    summary_sentences = sorted_sentences[:3]

    return ' '.join(summary_sentences)

# LSA Summarization
def lsa_summarization(text, sentences_count=2):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, sentences_count)
    summary_sentences = [str(sentence) for sentence in summary]
    return ' '.join(summary_sentences)

# Luhn Summarization
def luhn_summarization(text, sentences_count=2):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LuhnSummarizer()
    summary = summarizer(parser.document, sentences_count)
    summary_sentences = [str(sentence) for sentence in summary]
    return ' '.join(summary_sentences)

# LexRank Summarization
def lex_rank_summarization(text, sentences_count=2):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, sentences_count)
    summary_sentences = [str(sentence) for sentence in summary]
    return ' '.join(summary_sentences)

# Main function to call summarization methods
def summarize_text(text, method="frequency", sentences_count=2):
    if method == "frequency":
        return frequency_based_summarization(text)
    elif method == "lsa":
        return lsa_summarization(text, sentences_count)
    elif method == "luhn":
        return luhn_summarization(text, sentences_count)
    elif method == "lex_rank":
        return lex_rank_summarization(text, sentences_count)
    else:
        return "Invalid method selected."

# Example text
text = """
Artificial Intelligence (AI) is revolutionizing multiple industries and transforming the way we live and work.
AI technologies, including machine learning, natural language processing, and robotics, are being integrated into applications like healthcare, finance, and transportation.
In healthcare, AI is improving diagnostics and treatment, making processes faster and more accurate. In finance, it helps detect fraudulent transactions and manage risks effectively.
AI-powered self-driving cars are reshaping transportation by reducing accidents and optimizing traffic flow. Despite its benefits, AI poses ethical challenges, such as privacy concerns and potential job displacement.
Governments and organizations are working on policies to address these issues while fostering innovation in AI. The future of AI looks promising, but it requires careful consideration of both its potential and its risks.
"""

# User input for method
method_choice = input("Choose summarization method (frequency, lsa, luhn, lex_rank): ")
summary = summarize_text(text, method=method_choice.lower())
print("Summary:")
print(summary)
# Calculate and print the number of words
original_word_count = len(text.split())
summary_word_count = len(summary.split())

print(f"\nNumber of words in the original text: {original_word_count}")
print(f"Number of words in the summary: {summary_word_count}")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


Choose summarization method (frequency, lsa, luhn, lex_rank): lex_rank
Summary:
Artificial Intelligence (AI) is revolutionizing multiple industries and transforming the way we live and work. AI technologies, including machine learning, natural language processing, and robotics, are being integrated into applications like healthcare, finance, and transportation.

Number of words in the original text: 122
Number of words in the summary: 35
