In [None]:
#the frequency method

import nltk
nltk.download('punkt')
nltk.download('stopwords')

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
text = '''Automatic summarization is the process of shortening a set of data computationally, to create a subset (a summary) that represents the most important or relevant information within the original content. Artificial intelligence algorithms are commonly developed and employed to achieve this, specialized for different types of data.

Text summarization is usually implemented by natural language processing methods, designed to locate the most informative sentences in a given document. On the other hand, visual content can be summarized using computer vision algorithms.'''


def solve(text):
    stopwords1 = set(stopwords.words("english"))
    words = word_tokenize(text)

    # Create a frequency table for the words
    freqTable = {}
    for word in words:
        word = word.lower()
        if word in stopwords1:
            continue
        if word in freqTable:
            freqTable[word] += 1
        else:
            freqTable[word] = 1

    # Score sentences based on word frequency
    sentences = sent_tokenize(text)
    sentenceValue = {}
    for sentence in sentences:
        for word, freq in freqTable.items():
            if word in sentence.lower():
                if sentence in sentenceValue:
                    sentenceValue[sentence] += freq
                else:
                    sentenceValue[sentence] = freq

    # Sort sentences by score
    sorted_sentences = sorted(sentenceValue.items(), key=lambda item: item[1], reverse=True)

    # Select top 3 sentences for the summary
    summary = ' '.join([sentence for sentence, score in sorted_sentences[:3]])

    return summary.strip()

summary = solve(text)
print(summary)


Automatic summarization is the process of shortening a set of data computationally, to create a subset (a summary) that represents the most important or relevant information within the original content. Text summarization is usually implemented by natural language processing methods, designed to locate the most informative sentences in a given document. Artificial intelligence algorithms are commonly developed and employed to achieve this, specialized for different types of data.


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
#Latent Semantic Analyzer (LSA)

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

def lsa_method(text):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer_lsa = LsaSummarizer()
    summary = summarizer_lsa(parser.document, 1)
    dp = [str(sentence) for sentence in summary]
    final_sentence = ' '.join(dp)
    return final_sentence

text = ''' Text summarization is usually implemented by natural language processing methods.It is designed to locate the most informative sentences in a given document.[1] On the other hand, visual content can be summarized using computer vision algorithms. Image summarization is the subject of ongoing research; existing approaches typically attempt to display the most representative images from a given image collection, or generate a video that only includes the most important content from the entire collection.[2][3][4] Video summarization algorithms identify and extract from the original video content the most important frames (key-frames), and/or the most important video segments (key-shots), normally in a temporally ordered fashion.[5][6][7][8] Video summaries simply retain a carefully selected subset of the original video frames and, therefore, are not identical to the output of video synopsis algorithms, where new video frames are being synthesized based on the original video content'''
final = lsa_method(text)
print(final)


[1] On the other hand, visual content can be summarized using computer vision algorithms.


In [None]:
#luhn method
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer

def lunh_method(text):
  parser = PlaintextParser.from_string(text, Tokenizer("english"))
  summarizer_luhn = LuhnSummarizer()
  summary_1 = summarizer_luhn(parser.document,1)
  dp = [str(sentence) for sentence in summary_1]
  final_sentence = ' '.join(dp)
  return final_sentence

text = ''' Text summarization is usually implemented by natural language processing methods.It is designed to locate the most informative sentences in a given document.[1] On the other hand, visual content can be summarized using computer vision algorithms. Image summarization is the subject of ongoing research. existing approaches typically attempt to display the most representative images from a given image collection, or generate a video that only includes the most important content from the entire collection.[2][3][4] Video summarization algorithms identify and extract from the original video content the most important frames (key-frames), and/or the most important video segments (key-shots), normally in a temporally ordered fashion.[5][6][7][8] Video summaries simply retain a carefully selected subset of the original video frames and, therefore, are not identical to the output of video synopsis algorithms, where new video frames are being synthesized based on the original video content'''
final = lunh_method(text)
print(final)

[2][3][4] Video summarization algorithms identify and extract from the original video content the most important frames (key-frames), and/or the most important video segments (key-shots), normally in a temporally ordered fashion.


In [None]:
#lex rank method
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
def sumy_method(text):
  parser = PlaintextParser.from_string(text, Tokenizer("english"))
  summarizer = LexRankSummarizer()
  summary = summarizer(parser.document, 4)
  dp = []
  for i in summary:
    lp = str(i)
    dp.append(lp)
    final_sentence = ' '.join(dp)
  return final_sentence
text = ''' Text summarization is usually implemented by natural language processing methods.It is designed to locate the most informative sentences in a given document.[1] On the other hand, visual content can be summarized using computer vision algorithms. Image summarization is the subject of ongoing research. existing approaches typically attempt to display the most representative images from a given image collection, or generate a video that only includes the most important content from the entire collection.[2][3][4] Video summarization algorithms identify and extract from the original video content the most important frames (key-frames), and/or the most important video segments (key-shots), normally in a temporally ordered fashion.[5][6][7][8] Video summaries simply retain a carefully selected subset of the original video frames and, therefore, are not identical to the output of video synopsis algorithms, where new video frames are being synthesized based on the original video content'''
final = sumy_method(text)
print(final)

Text summarization is usually implemented by natural language processing methods.It is designed to locate the most informative sentences in a given document. [1] On the other hand, visual content can be summarized using computer vision algorithms. Image summarization is the subject of ongoing research. [2][3][4] Video summarization algorithms identify and extract from the original video content the most important frames (key-frames), and/or the most important video segments (key-shots), normally in a temporally ordered fashion.


In [None]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
!pip install sumy


In [None]:
#                      INPUT METHOD  NAME FUNCTION FOR EXTRACTIVE SUMMARIZATION

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords

import sumy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer

def frequency(text,lines):
    line=lines
    stopwords1 = set(stopwords.words("english"))
    words = word_tokenize(text)

    # Create a frequency table for the words
    freqTable = {}
    for word in words:
        word = word.lower()
        if word in stopwords1:
            continue
        if word in freqTable:
            freqTable[word] += 1
        else:
            freqTable[word] = 1

    # Score sentences based on word frequency
    sentences = sent_tokenize(text)
    sentenceValue = {}
    for sentence in sentences:
        for word, freq in freqTable.items():
            if word in sentence.lower():
                if sentence in sentenceValue:
                    sentenceValue[sentence] += freq
                else:
                    sentenceValue[sentence] = freq

    # Sort sentences by score
    sorted_sentences = sorted(sentenceValue.items(), key=lambda item: item[1], reverse=True)

    # Select top 3 sentences for the summary
    summary = ' '.join([sentence for sentence, score in sorted_sentences[:line]])

    return summary.strip()

#Latent Semantic Analyzer (LSA)

def lsa(text,lines):
    line=lines
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer_lsa = LsaSummarizer()
    summary = summarizer_lsa(parser.document, line)
    dp = [str(sentence) for sentence in summary]
    final_sentence = ' '.join(dp)
    return final_sentence

#luhn method

def lunh(text,lines):
  line=lines
  parser = PlaintextParser.from_string(text, Tokenizer("english"))
  summarizer_luhn = LuhnSummarizer()
  summary_1 = summarizer_luhn(parser.document,line)
  dp = [str(sentence) for sentence in summary_1]
  final_sentence = ' '.join(dp)
  return final_sentence

#lex rank method
def lexrank(text,lines):
  line=lines
  parser = PlaintextParser.from_string(text, Tokenizer("english"))
  summarizer = LexRankSummarizer()
  summary = summarizer(parser.document, line)
  dp = []
  for i in summary:
    lp = str(i)
    dp.append(lp)
    final_sentence = ' '.join(dp)
  return final_sentence

text = ''' Text summarization is usually implemented by natural language processing methods.It is designed to locate the most informative sentences in a given document.[1] On the other hand, visual content can be summarized using computer vision algorithms. Image summarization is the subject of ongoing research. existing approaches typically attempt to display the most representative images from a given image collection, or generate a video that only includes the most important content from the entire collection.[2][3][4] Video summarization algorithms identify and extract from the original video content the most important frames (key-frames), and/or the most important video segments (key-shots), normally in a temporally ordered fashion.[5][6][7][8] Video summaries simply retain a carefully selected subset of the original video frames and, therefore, are not identical to the output of video synopsis algorithms, where new video frames are being synthesized based on the original video content'''

methods = {
    "frequency":frequency,
    "lsa":lsa,
    "luhn":lunh,
    "lexrank":lexrank
}
method_name = input("Enter method : ")
lines = int(input("Enter number of lines: "))

if method_name in methods:
    final = methods[method_name](text, lines)
    print(final)
else:
    print("Invalid method name.")

Enter method : lsa
Enter number of lines: 1
[1] On the other hand, visual content can be summarized using computer vision algorithms.
