In [None]:
!pip install gradio
!pip install sumy



In [None]:
import gradio as gr
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.kl import KLSummarizer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import defaultdict
import string
import nltk



In [None]:

nltk.download('punkt')
nltk.download('stopwords')



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:

def load_abstractive_summarizer(model_name):
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
        return summarizer
    except Exception as e:
        return str(e)



In [None]:

t5_summarizer = load_abstractive_summarizer("t5-small")
bart_summarizer = load_abstractive_summarizer("facebook/bart-large-cnn")





In [None]:

def llm_summarizer(text):
    return "LLM summarization is a placeholder. Implement custom LLM here."



In [None]:

def extractive_summary(text, num_sentences=3):
    stop_words = set(stopwords.words('english'))
    text = text.lower()
    sentences = sent_tokenize(text)


    word_frequencies = defaultdict(int)
    for word in word_tokenize(text):
        if word not in stop_words and word not in string.punctuation:
            word_frequencies[word] += 1


    sentence_scores = defaultdict(int)
    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_frequencies:
                sentence_scores[sentence] += word_frequencies[word]


    summarized_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:num_sentences]

    return ' '.join(summarized_sentences)



In [None]:

def kl_summarizer(text):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = KLSummarizer()
    summary = summarizer(parser.document, 3)
    return " ".join([str(sentence) for sentence in summary])

def luhn_summarizer(text):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LuhnSummarizer()
    summary = summarizer(parser.document, 3)
    return " ".join([str(sentence) for sentence in summary])

def lexrank_summarizer(text):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, 3)
    return " ".join([str(sentence) for sentence in summary])



In [None]:

def calculate_counts(text):
    num_words = len(word_tokenize(text))
    num_lines = len(text.split('\n'))
    return num_words, num_lines



In [None]:

def summarize(text, model_type, model_name):
    try:
        summary = ""
        if model_type == "Abstractive":
            if model_name == "T5":
                if isinstance(t5_summarizer, str):
                    return t5_summarizer
                summary = t5_summarizer(text)[0]['summary_text']
            elif model_name == "BART":
                if isinstance(bart_summarizer, str):
                    return bart_summarizer
                summary = bart_summarizer(text)[0]['summary_text']
            elif model_name == "LLM":
                summary = llm_summarizer(text)
        elif model_type == "Extractive":
            if model_name == "K-Means Clustering":
                summary = extractive_summary(text)
            elif model_name == "KL-Sum":
                summary = kl_summarizer(text)
            elif model_name == "Luhn Summarizer":
                summary = luhn_summarizer(text)
            elif model_name == "LexRank Summarizer":
                summary = lexrank_summarizer(text)


        original_words, original_lines = calculate_counts(text)
        summary_words, summary_lines = calculate_counts(summary)

        return f"\n{summary}\n\nOriginal Content - Words: {original_words}, Lines: {original_lines}\nSummary - Words: {summary_words}, Lines: {summary_lines}"

    except Exception as e:
        return f"Error: {str(e)}"


def summarize_text(input_text):
    summary = generate_summary(input_text)  # This generates the summary
    return f"Summary:\n{summary}"  # This is the line adding "Summary:"


def summarize_text(input_text):
    summary = generate_summary(input_text)  # Generates the summary
    return summary  # Remove the "Summary:" prefix

In [None]:

model_types = ["Abstractive", "Extractive"]
abstractive_models = ["T5", "BART", "LLM"]
extractive_models = ["K-Means Clustering", "KL-Sum", "Luhn Summarizer", "LexRank Summarizer"]

def update_model_options(model_type):
    if model_type == "Abstractive":
        return gr.update(choices=abstractive_models)
    else:
        return gr.update(choices=extractive_models)



In [None]:

with gr.Blocks() as demo:
    gr.Markdown("## Summarization App: Abstractive & Extractive")

    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(lines=10, placeholder="Enter the text to summarize")
            model_type = gr.Radio(choices=model_types, label="Choose Summarization Type", value="Abstractive")
            model_name = gr.Dropdown(choices=abstractive_models, label="Choose Model")
            model_type.change(fn=update_model_options, inputs=model_type, outputs=model_name)
            summarize_button = gr.Button("Synthasize")

        with gr.Column():
            output = gr.Textbox(lines=15, label="Synthasize")

    summarize_button.click(summarize, inputs=[text_input, model_type, model_name], outputs=output)

demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e1859326afe506513d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


