In [1]:
!pip install gradio --quiet
!pip install sumy --quiet
!pip install langchain_google_genai --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.1/320.1 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.1/11.1 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.8/63.8 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [2]:
from google.colab import userdata
import os
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

In [3]:
import gradio as gr
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
import string
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import BartForConditionalGeneration, BartTokenizer
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

# Download required NLTK data
nltk.download("punkt")
nltk.download("punkt_tab")
nltk.download("stopwords")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [5]:
# Frequency-based summarization
def frequency_based_summary(text, max_words):
    try:
        max_words = int(max_words)
        words = word_tokenize(text.lower())
        stop_words = set(stopwords.words("english"))
        words = [word for word in words if word.isalnum() and word not in stop_words]

        word_freq = {}
        for word in words:
            word_freq[word] = word_freq.get(word, 0) + 1

        sentences = sent_tokenize(text)
        sentence_scores = {}

        for sentence in sentences:
            for word in word_tokenize(sentence.lower()):
                if word in word_freq:
                    sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_freq[word]

        sorted_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)
        summary = []
        word_count = 0

        for sentence in sorted_sentences:
            word_count += len(word_tokenize(sentence))
            if word_count <= max_words:
                summary.append(sentence)
            else:
                break

        return " ".join(summary)
    except Exception as e:
        return f"Error: {str(e)}"

# LexRank summarization
def lexrank_summary(text, max_words):
    try:
        parser = PlaintextParser.from_string(text, Tokenizer("english"))
        summarizer = LexRankSummarizer()
        sentences = parser.document.sentences

        summary = []
        word_count = 0
        for sentence in summarizer(parser.document, len(sentences)):
            sentence_words = len(word_tokenize(str(sentence)))
            if word_count + sentence_words <= int(max_words):
                summary.append(str(sentence))
                word_count += sentence_words
            else:
                break

        return " ".join(summary)
    except Exception as e:
        return f"Error: {str(e)}"

# TextRank summarization
def textrank_summary(text, max_words):
    try:
        max_words = int(max_words)
        sentences = sent_tokenize(text)

        clean_sentences = []
        stop_words = set(stopwords.words("english"))
        for sentence in sentences:
            words = word_tokenize(sentence.lower())
            words = [word for word in words if word.isalnum() and word not in stop_words]
            clean_sentences.append(" ".join(words))

        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform(clean_sentences)
        similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

        graph = nx.from_numpy_array(similarity_matrix)
        scores = nx.pagerank(graph)
        ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)

        summary = []
        word_count = 0
        for _, sentence in ranked_sentences:
            sentence_words = len(word_tokenize(sentence))
            if word_count + sentence_words <= max_words:
                summary.append(sentence)
                word_count += sentence_words
            else:
                break

        return " ".join(summary)
    except Exception as e:
        return f"Error: {str(e)}"

# T5 summarization
def t5_summary(text, max_words):
    try:
        max_words = int(max_words)
        tokenizer = T5Tokenizer.from_pretrained("t5-small")
        model = T5ForConditionalGeneration.from_pretrained("t5-small")

        input_text = "summarize: " + text
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        summary_ids = model.generate(inputs, max_length=max_words, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary
    except Exception as e:
        return f"Error: {str(e)}"

# BART summarization
def bart_summary(text, max_words):
    try:
        max_words = int(max_words)
        tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
        model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

        inputs = tokenizer.encode(text, return_tensors="pt", max_length=1024, truncation=True)
        summary_ids = model.generate(inputs, max_length=max_words, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary
    except Exception as e:
        return f"Error: {str(e)}"

# LLM (Gemini) summarization
def llm_summary(text, max_words):
    try:
        max_words = int(max_words)
        llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3, max_output_tokens=max_words)
        prompt = ChatPromptTemplate.from_messages([("system", f"Summarize this text in {max_words} words:\n\n"), ("human", text)])
        chain = prompt | llm
        result = chain.invoke({"text": text})
        return result.content
    except Exception as e:
        return f"Error: {str(e)}"

# Extractive summarization function
def extractive_summarize(text, method, max_words):
    if not text.strip():
        return "Please enter some text to summarize."

    try:
        max_words = int(max_words)
        if max_words < 1:
            return "Please enter a positive number of words."

        if method == "Frequency-based":
            return frequency_based_summary(text, max_words)
        elif method == "LexRank":
            return lexrank_summary(text, max_words)
        elif method == "TextRank":
            return textrank_summary(text, max_words)
        return "Invalid method selected"
    except ValueError:
        return "Please enter a valid number for maximum words."
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Abstractive summarization function
def abstractive_summarize(text, method, max_words):
    if not text.strip():
        return "Please enter some text to summarize."

    try:
        max_words = int(max_words)
        if max_words < 1:
            return "Please enter a positive number of words."

        if method == "T5":
            return t5_summary(text, max_words)
        elif method == "BART":
            return bart_summary(text, max_words)
        elif method == "LLM (Gemini)":
            return llm_summary(text, max_words)
        return "Invalid method selected"
    except ValueError:
        return "Please enter a valid number for maximum words."
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Text Summarization App")

    with gr.Tabs():
        with gr.Tab("Extractive Summarization"):
            method = gr.Dropdown(choices=["Frequency-based", "LexRank", "TextRank"], label="Summarization Method")
            words = gr.Textbox(label="Maximum Words", value="100")
            text = gr.Textbox(label="Input Text", lines=5, placeholder="Enter text here")
            output = gr.Textbox(label="Summary", lines=5)
            button = gr.Button("Generate Summary")
            button.click(extractive_summarize, inputs=[text, method, words], outputs=output)

        with gr.Tab("Abstractive Summarization"):
            method = gr.Dropdown(choices=["T5", "BART", "LLM (Gemini)"], label="Summarization Method")
            words = gr.Textbox(label="Maximum Words", value="100")
            text = gr.Textbox(label="Input Text", lines=5, placeholder="Enter text here")
            output = gr.Textbox(label="Summary", lines=5)
            button = gr.Button("Generate Summary")
            button.click(abstractive_summarize, inputs=[text, method, words], outputs=output)

demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f211b72f5108e5a6de.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


