In [None]:
!pip install gradio

In [None]:
!pip install sumy

In [None]:
pip install langchain[google]

In [None]:
from google.colab import userdata
import os
os.environ["GOOGLE_API_KEY"] =userdata.get('GOOGLE_API_KEY')
# os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get('HUGGINGFACEHUB_API_TOKEN')

In [None]:
%pip install --upgrade --quiet tiktoken langchain langgraph beautifulsoup4 langchain langchain-google-genai langchain-huggingface

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.2 MB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━[0m [32m0.9/1.2 MB[0m [31m13.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.0/125.0 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

In [None]:
import gradio as gr
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from transformers import T5ForConditionalGeneration, T5Tokenizer, BartForConditionalGeneration, BartTokenizer

In [None]:
# Download nltk data
nltk.download("punkt")
nltk.download("stopwords")

In [None]:
# Functions for Extractive Summarization Methods
def frequency_summarize(text, max_words):
    stopwords1 = set(stopwords.words("english"))
    words = word_tokenize(text)
    freqTable = {word: words.count(word) for word in set(words) if word.lower() not in stopwords1}
    sentences = sent_tokenize(text)
    sentenceValue = {}
    for sentence in sentences:
        sentenceValue[sentence] = sum(freqTable.get(word.lower(), 0) for word in word_tokenize(sentence))
    sorted_sentences = sorted(sentenceValue.items(), key=lambda item: item[1], reverse=True)
    summary = ' '.join(sentence for sentence, score in sorted_sentences[:3])
    if len(summary.split()) > max_words:
        summary = ' '.join(summary.split()[:max_words])
    return summary

def lsa_summarize(text, max_words):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = ' '.join(str(sentence) for sentence in summarizer(parser.document, 3))
    return summary if len(summary.split()) <= max_words else ' '.join(summary.split()[:max_words])

def lexrank_summarize(text, max_words):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = ' '.join(str(sentence) for sentence in summarizer(parser.document, 3))
    return summary if len(summary.split()) <= max_words else ' '.join(summary.split()[:max_words])

def luhn_summarize(text, max_words):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LuhnSummarizer()
    summary = ' '.join(str(sentence) for sentence in summarizer(parser.document, 3))
    return summary if len(summary.split()) <= max_words else ' '.join(summary.split()[:max_words])


In [None]:
# Abstractive Summarization Methods
t5_model = T5ForConditionalGeneration.from_pretrained("t5-small")
t5_tokenizer = T5Tokenizer.from_pretrained("t5-small")

bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

def t5_summarize(text, max_words):
    inputs = t5_tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = t5_model.generate(inputs, max_length=max_words, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    return t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

def bart_summarize(text, max_words):
    inputs = bart_tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = bart_model.generate(inputs, max_length=max_words, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    return bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# LLM Summarizer
def load_llm(model="gemini-1.5-pro"):
    return ChatGoogleGenerativeAI(model=model, temperature=0, max_tokens=None, timeout=None, max_retries=2)

def get_prompt_template():
    return ChatPromptTemplate.from_messages([("system", "Summarize this in {num_words} words:\n\n"), ("human", "{context}")])

def llm_summarize(text, model="gemini-1.5-pro", num_words=50):
    llm = load_llm(model)
    prompt = get_prompt_template()
    chain = prompt | llm
    result = chain.invoke({"context": text, "num_words": num_words})
    return result.content

In [None]:
import gradio as gr

# Assuming the summarization functions are defined elsewhere
# from your_summarization_module import frequency_summarize, lsa_summarize, lexrank_summarize, luhn_summarize
# from your_summarization_module import t5_summarize, bart_summarize, llm_summarize

def word_count(text):
    return len(text.split())

with gr.Blocks(title="Summarizer App") as demo:
    gr.Markdown("# Summarizer App")

    with gr.Tabs():
        # Extractive summarization tab
        with gr.TabItem("Extractive Summarization"):
            extractive_method_choice = gr.Dropdown(choices=["Frequency", "LSA", "LexRank", "Luhn"], label="Select Extractive Method")
            extractive_max_words = gr.Number(label="Max Words", value=50)

            with gr.Row():
                # Left column for input text
                with gr.Column(scale=1):
                    extractive_input_text = gr.Textbox(label="Input Text", lines=10, placeholder="Enter text to summarize")

                # Right column for output text and word counts
                with gr.Column(scale=1):
                    extractive_output_text = gr.Textbox(label="Extractive Summary", placeholder="Summary will appear here")
                    input_word_count = gr.Textbox(label="Input Word Count", value="0", interactive=False)
                    output_word_count = gr.Textbox(label="Output Word Count", value="0", interactive=False)

            def extractive_summarize(text, method, max_words):
                if method == "Frequency":
                    summary = frequency_summarize(text, max_words)
                elif method == "LSA":
                    summary = lsa_summarize(text, max_words)
                elif method == "LexRank":
                    summary = lexrank_summarize(text, max_words)
                elif method == "Luhn":
                    summary = luhn_summarize(text, max_words)
                else:
                    return "Invalid method selected.", 0

                return summary, word_count(text), word_count(summary)

            extractive_generate_button = gr.Button("Generate Extractive Summary")
            extractive_generate_button.click(
                fn=extractive_summarize,
                inputs=[extractive_input_text, extractive_method_choice, extractive_max_words],
                outputs=[extractive_output_text, input_word_count, output_word_count]
            )

        # Abstractive summarization tab
        with gr.TabItem("Abstractive Summarization"):
            abstractive_method_choice = gr.Dropdown(choices=["T5", "BART", "LLM (Gemini-1.5)"], label="Select Abstractive Method")
            abstractive_max_words = gr.Number(label="Max Words", value=50)

            with gr.Row():
                # Left column for input text
                with gr.Column(scale=1):
                    abstractive_input_text = gr.Textbox(label="Input Text", lines=10, placeholder="Enter text to summarize")

                # Right column for output text and word counts
                with gr.Column(scale=1):
                    abstractive_output_text = gr.Textbox(label="Abstractive Summary", placeholder="Summary will appear here")
                    input_word_count_abstractive = gr.Textbox(label="Input Word Count", value="0", interactive=False)
                    output_word_count_abstractive = gr.Textbox(label="Output Word Count", value="0", interactive=False)

            def abstractive_summarize(text, method, max_words):
                if method == "T5":
                    summary = t5_summarize(text, max_words)
                elif method == "BART":
                    summary = bart_summarize(text, max_words)
                elif method == "LLM (Gemini-1.5)":
                    summary = llm_summarize(text, model="gemini-1.5-pro", num_words=max_words)
                else:
                    return "Invalid method selected.", 0

                return summary, word_count(text), word_count(summary)

            abstractive_generate_button = gr.Button("Generate Abstractive Summary")
            abstractive_generate_button.click(
                fn=abstractive_summarize,
                inputs=[abstractive_input_text, abstractive_method_choice, abstractive_max_words],
                outputs=[abstractive_output_text, input_word_count_abstractive, output_word_count_abstractive]
            )

demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://60e2d00738b933f2d9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


