In [1]:
!pip install gradio --quiet

import gradio as gr

# Define your summarization functions
def extractive_summarization_frequency(txt):
    import nltk
    import heapq
    from nltk.corpus import stopwords
    from nltk.tokenize import sent_tokenize, word_tokenize

    nltk.download('punkt')
    nltk.download('stopwords')

    def summarize_text(text, n):
        # Tokenize sentences
        sentences = sent_tokenize(text)

        # Preprocess text to filter out non-alphabetic words and stopwords
        def preprocess_text(text):
            processed_words = []
            for word in word_tokenize(text):
                if word.isalpha():
                    processed_words.append(word.lower())
            return processed_words

        words = preprocess_text(text)

        stop_words = set(stopwords.words('english'))
        filtered_words = [word for word in words if word not in stop_words]

        # Calculate word frequencies
        word_frequencies = {}
        for word in filtered_words:
            if word in word_frequencies:
                word_frequencies[word] += 1
            else:
                word_frequencies[word] = 1

        # Normalize word frequencies
        max_frequency = max(word_frequencies.values())
        for word in word_frequencies:
            word_frequencies[word] /= max_frequency

        # Score sentences based on word frequencies
        sentence_scores = {}
        for sentence in sentences:
            sentence_words = preprocess_text(sentence)
            for word in sentence_words:
                if word in word_frequencies:
                    if len(sentence.split(' ')) < 30:  # Only consider sentences with fewer than 30 words
                        if sentence in sentence_scores:
                            sentence_scores[sentence] += word_frequencies[word]
                        else:
                            sentence_scores[sentence] = word_frequencies[word]

        # Get the top 'n' sentences with the highest scores
        summary = heapq.nlargest(n, sentence_scores, key=sentence_scores.get)

        return " ".join(summary)  # Return summary as a string

    # Take the full text input from the user
    text = txt

    # Replace any user-specified "\n" with actual line breaks
    text = text.replace("\n", "\n")

    # Take the number of lines for the summary
    n = 2

    # Summarize the text
    return summarize_text(text, n)


def extractive_summarization_tfidf(txt):
    from sklearn.feature_extraction.text import TfidfVectorizer
    from nltk.tokenize import sent_tokenize
    import numpy as np
    import nltk
    nltk.download('punkt')

    def summarize_text(text, n):
        # Tokenize sentences
        sentences = sent_tokenize(text)

        # Generate the TF-IDF matrix
        tfidf = TfidfVectorizer()
        tfidf_matrix = tfidf.fit_transform(sentences)

        # Calculate sentence scores by summing TF-IDF values for each sentence
        sentence_scores = np.sum(tfidf_matrix.toarray(), axis=1)

        # Get indices of top 'n' sentences with the highest scores
        top_sentence_indices = np.argsort(sentence_scores)[-n:]

        # Create the summary with the selected sentences
        summary = [sentences[i] for i in top_sentence_indices]

        return " ".join(summary)  # Return summary as a string

    # Take the full text input from the user
    text = txt

    # Replace any user-specified "\n" with actual line breaks
    text = text.replace("\n", "\n")

    # Take the number of lines for the summary
    n = 2

    # Summarize the text
    return summarize_text(text, n)


def extractive_summarization_lsa(txt):
    from sumy.parsers.plaintext import PlaintextParser
    from sumy.nlp.tokenizers import Tokenizer
    from sumy.summarizers.lsa import LsaSummarizer

    def summarize_text(text, n):
        # Parse the text
        parser = PlaintextParser.from_string(text, Tokenizer("english"))

        # Initialize LSA summarizer
        summarizer = LsaSummarizer()

        # Generate summary
        summary = summarizer(parser.document, n)

        return " ".join(str(sentence) for sentence in summary)  # Return summary as a string

    # Take the full text input from the user
    text = txt

    # Replace any user-specified "\n" with actual line breaks
    text = text.replace("\n", "\n")

    # Take the number of lines for the summary
    n = 2

    # Summarize the text
    return summarize_text(text, n)


def abstractive_summarization_bart(text):
    from transformers import pipeline

    # Initialize the BART summarization pipeline
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

    # The article to summarize
    ARTICLE = text

    # Generate the summary
    summary = summarizer(ARTICLE, max_length=53, min_length=30, do_sample=False)

    # Extract the summary text from the output
    summary_text = summary[0]['summary_text']

    return summary_text  # Return summary as a string


def abstractive_summarization_llm(txt):
    import os
    os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")  # Replace  with your Google api key

    # Ensure the necessary libraries are installed
    !pip install --upgrade --quiet langchain langchain-google-genai beautifulsoup4

    from langchain_google_genai import ChatGoogleGenerativeAI
    from langchain_core.prompts import ChatPromptTemplate

    # Generalized function to load LLM (Gemini Models)
    def load_llm(model="gemini-1.5-pro"):
        llm = ChatGoogleGenerativeAI(
            model=model,
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2
        )
        return llm

    # Generalized function to get a prompt template
    def get_prompt_template():
        # Define prompt
        prompt = ChatPromptTemplate.from_messages(
            [
                ("system", "Write a concise summary of the following in {num_words} words:\n\n"),
                ("human", "{context}")
            ]
        )
        return prompt

    # Function to summarize text using Google Gemini Models
    def summarize_text(text, num_words=50, model="gemini-1.5-pro"):
        llm = load_llm(model)
        prompt = get_prompt_template()
        chain = prompt | llm

        result = chain.invoke({
            "context": text,
            "num_words": num_words
        })

        return result.content  # Return the summarized result

    # Example text for summarization
    text = txt

    # Specify the number of words for the summary
    summary = summarize_text(text, num_words=20, model="gemini-1.5-flash")

    return summary  # Return summary as a string


def abstractive_summarization_t5(txt):
    from transformers import T5Tokenizer, T5ForConditionalGeneration

    # Load the pre-trained T5 model and tokenizer from Hugging Face
    model_name = "t5-small"
    model = T5ForConditionalGeneration.from_pretrained(model_name)
    tokenizer = T5Tokenizer.from_pretrained(model_name)

    def summarize_text(text, max_length=150, min_length=40, num_beams=4):
        # Prepend "summarize:" to the input text
        input_text = "summarize: " + text

        # Tokenize the input text
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

        # Generate the summary (using beam search for improved quality)
        summary_ids = model.generate(inputs, max_length=max_length, min_length=min_length,
                                    length_penalty=2.0, num_beams=num_beams, early_stopping=True)

        # Decode the generated tokens into text
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        return summary  # Return summary as a string

    summary = summarize_text(txt)

    return summary  # Return summary as a string


def extractive_summarize_text(text, method):
    if method == "Extractive LSA":
        return extractive_summarization_lsa(text)
    elif method == "Extractive TFIDF":
        return extractive_summarization_tfidf(text)
    elif method == "Extractive FREQUENCY":
        return extractive_summarization_frequency(text)
    else:
        return "Please select a valid summarization method."


def abstractive_summarize_text(text, method):
    if method == "Abstractive BART":
        return abstractive_summarization_bart(text)
    elif method == "Abstractive LLM":
        return abstractive_summarization_llm(text)
    elif method == "Abstractive T5":
        return abstractive_summarization_t5(text)
    else:
        return "Please select a valid summarization method."



import gradio as gr

css = """
h1 {
    margin-top: 2rem;
    font-size: 2rem;
    text-align: center;
}
"""

input_text = gr.Text(label="Input Text", lines=10)

with gr.Blocks(title="Summarizer App", css=css) as demo:
    gr.Markdown("# Summarizer App")

    with gr.Tabs():
        with gr.TabItem("Extractive"):
            gr.Interface(fn=extractive_summarize_text,
                        inputs=[input_text,gr.Dropdown(choices=["Extractive LSA", "Extractive FREQUENCY", "Extractive TFIDF"],label="Select Method")],
                        outputs=['text'],
                        flagging_mode='never',
                        submit_btn='Generate')
        with gr.TabItem("Abstractive"):
          gr.Interface(fn=abstractive_summarize_text,
                      inputs=[input_text,gr.Dropdown(choices=[ "Abstractive BART", "Abstractive LLM", "Abstractive T5"],label="Select Method")],
                      outputs=['text'],
                      flagging_mode='never',
                      submit_btn='Generate')


#demo.launch(server_name='localhost', server_port='8080')

demo.launch()

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.1/320.1 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.8/63.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.2/130.2 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hRunning Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` expl

