<a href="https://colab.research.google.com/github/rishita1524-source/text_summarizer_app/blob/main/text_summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install -q transformers torch sentencepiece sumy nltk

import nltk
nltk.download('punkt')
nltk.download('stopwords')

import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import time
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import textwrap

# Custom CSS for styling
style = """
<style>
    .summary-app {
        font-family: 'Arial', sans-serif;
        max-width: 1200px;
        margin: 0 auto;
        padding: 20px;
    }
    .header {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        padding: 30px;
        border-radius: 15px;
        text-align: center;
        margin-bottom: 30px;
        box-shadow: 0 4px 15px rgba(0,0,0,0.1);
    }
    .header h1 {
        margin: 0;
        font-size: 2.5em;
        font-weight: bold;
    }
    .header p {
        margin: 10px 0 0 0;
        font-size: 1.2em;
        opacity: 0.9;
    }
    .input-section, .output-section {
        background: white;
        padding: 25px;
        border-radius: 15px;
        margin-bottom: 25px;
        box-shadow: 0 2px 10px rgba(0,0,0,0.05);
        border: 1px solid #e0e0e0;
    }
    .control-panel {
        background: #f8f9fa;
        padding: 20px;
        border-radius: 10px;
        margin: 20px 0;
    }
    .stats-card {
        background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%);
        color: white;
        padding: 20px;
        border-radius: 10px;
        margin: 15px 0;
    }
    .summary-card {
        background: #f8f9fa;
        padding: 25px;
        border-radius: 10px;
        border-left: 5px solid #667eea;
        margin: 15px 0;
    }
    .method-badge {
        display: inline-block;
        background: #667eea;
        color: white;
        padding: 5px 15px;
        border-radius: 20px;
        font-size: 0.9em;
        margin: 5px;
    }
    .loading-spinner {
        display: inline-block;
        width: 20px;
        height: 20px;
        border: 3px solid #f3f3f3;
        border-top: 3px solid #667eea;
        border-radius: 50%;
        animation: spin 1s linear infinite;
        margin-right: 10px;
    }
    @keyframes spin {
        0% { transform: rotate(0deg); }
        100% { transform: rotate(360deg); }
    }
    .btn-primary {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        border: none;
        padding: 12px 30px;
        border-radius: 25px;
        font-size: 1.1em;
        cursor: pointer;
        transition: transform 0.2s;
    }
    .btn-primary:hover {
        transform: translateY(-2px);
        box-shadow: 0 5px 15px rgba(0,0,0,0.2);
    }
    .progress-bar {
        width: 100%;
        height: 6px;
        background: #e0e0e0;
        border-radius: 3px;
        overflow: hidden;
        margin: 10px 0;
    }
    .progress-fill {
        height: 100%;
        background: linear-gradient(90deg, #667eea, #764ba2);
        transition: width 0.3s ease;
    }
</style>
"""

display(HTML(style))

class TextSummarizerApp:
    def __init__(self):
        self.models_loaded = False
        self.setup_models()

    def setup_models(self):
        """Load summarization models"""
        self.loading_status = widgets.HTML("<div class='loading-spinner'></div> Loading models...")
        display(self.loading_status)

        try:
            # Initialize models
            self.bart_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

            # Initialize sumy summarizers
            self.language = "english"
            self.stemmer = Stemmer(self.language)

            self.lsa_summarizer = LsaSummarizer(self.stemmer)
            self.lsa_summarizer.stop_words = get_stop_words(self.language)

            self.text_rank_summarizer = TextRankSummarizer(self.stemmer)
            self.text_rank_summarizer.stop_words = get_stop_words(self.language)

            self.lex_rank_summarizer = LexRankSummarizer(self.stemmer)
            self.lex_rank_summarizer.stop_words = get_stop_words(self.language)

            self.luhn_summarizer = LuhnSummarizer(self.stemmer)
            self.luhn_summarizer.stop_words = get_stop_words(self.language)

            self.models_loaded = True
            self.loading_status.close()
            display(HTML("<div style='color: green; text-align: center; padding: 20px;'>‚úÖ Models loaded successfully!</div>"))

        except Exception as e:
            self.loading_status.close()
            display(HTML(f"<div style='color: red; text-align: center; padding: 20px;'>‚ùå Error loading models: {str(e)}</div>"))

    def create_ui(self):
        """Create the main user interface"""

        # Header
        header = widgets.HTML("""
        <div class="header">
            <h1>üìù AI Text Summarizer</h1>
            <p>Transform long texts into concise summaries with advanced AI</p>
        </div>
        """)

        # Text input
        self.text_input = widgets.Textarea(
            value='',
            placeholder='Paste your text here... (Minimum 100 characters for best results)',
            description='',
            layout=widgets.Layout(width='100%', height='200px', margin='10px 0'),
            style={'description_width': 'initial'}
        )

        # Sample text button
        sample_text_btn = widgets.Button(
            description="üìã Load Sample Text",
            layout=widgets.Layout(width='200px', margin='5px')
        )
        sample_text_btn.on_click(self.load_sample_text)

        # Control panel
        method_dropdown = widgets.Dropdown(
            options=[
                ('ü§ñ BART (AI - Recommended)', 'bart'),
                ('üöÄ T5 (AI - Fast)', 't5'),
                ('üìä TextRank (Extractive)', 'text_rank'),
                ('üîç LSA (Extractive)', 'lsa'),
                ('‚≠ê LexRank (Extractive)', 'lex_rank'),
                ('üìà Luhn (Extractive)', 'luhn')
            ],
            value='bart',
            description='Method:',
            layout=widgets.Layout(width='100%', margin='10px 0'),
            style={'description_width': 'initial'}
        )

        length_slider = widgets.IntSlider(
            value=150,
            min=50,
            max=300,
            step=10,
            description='Summary Length:',
            layout=widgets.Layout(width='100%', margin='10px 0'),
            style={'description_width': 'initial'}
        )

        sentences_slider = widgets.IntSlider(
            value=3,
            min=1,
            max=10,
            step=1,
            description='Sentences:',
            layout=widgets.Layout(width='100%', margin='10px 0'),
            style={'description_width': 'initial'}
        )

        # Summarize button
        summarize_btn = widgets.Button(
            description="üöÄ Generate Summary",
            button_style='success',
            layout=widgets.Layout(width='250px', height='50px', margin='20px 0'),
            style={'font_weight': 'bold'}
        )
        summarize_btn.on_click(lambda x: self.generate_summary(method_dropdown.value, length_slider.value, sentences_slider.value))

        # Progress bar
        self.progress_bar = widgets.HTML("""
        <div class="progress-bar">
            <div class="progress-fill" style="width: 0%"></div>
        </div>
        """)

        # Output area
        self.output_area = widgets.Output(layout=widgets.Layout(width='100%', margin='20px 0'))

        # Assemble UI
        input_section = widgets.VBox([
            widgets.HTML("<h3>üì• Input Text</h3>"),
            self.text_input,
            sample_text_btn,
            widgets.HTML("<div class='control-panel'><h3>‚öôÔ∏è Settings</h3>"),
            method_dropdown,
            length_slider,
            sentences_slider,
            widgets.HTML("</div>"),
            summarize_btn,
            self.progress_bar
        ], layout=widgets.Layout(margin='20px 0'))

        # Display everything
        display(widgets.VBox([
            header,
            input_section,
            self.output_area
        ]))

    def load_sample_text(self, btn):
        """Load sample text for demonstration"""
        sample_text = """
Artificial intelligence (AI) is transforming our world in remarkable ways. From healthcare to transportation, education to entertainment, AI systems are being deployed to solve complex problems and enhance human capabilities.

In healthcare, AI algorithms can analyze medical images with incredible accuracy, often detecting diseases like cancer earlier than human doctors. Machine learning models process vast amounts of patient data to identify patterns and predict health risks, enabling preventative care and personalized treatment plans.

The transportation sector is undergoing a revolution with self-driving cars and intelligent traffic management systems. These AI-powered solutions promise to reduce accidents, optimize routes, and decrease congestion, ultimately making our roads safer and more efficient.

In education, AI tutors provide personalized learning experiences, adapting to each student's pace and learning style. Natural language processing enables intelligent chatbots to answer student questions 24/7, while machine learning algorithms identify areas where students struggle and suggest targeted interventions.

Businesses leverage AI for everything from customer service automation to supply chain optimization. Chatbots handle routine inquiries, recommendation systems personalize shopping experiences, and predictive analytics help companies anticipate market trends and customer needs.

Despite these advancements, AI also presents challenges. Ethical concerns around privacy, bias in algorithms, and the potential impact on employment require careful consideration. Researchers and policymakers are working to establish guidelines that ensure AI develops in ways that benefit all of humanity.

The future of AI holds even more promise. As technology advances, we can expect AI to help solve global challenges like climate change, disease outbreaks, and food security. The key will be developing AI systems that are transparent, accountable, and aligned with human values.
"""
        self.text_input.value = sample_text

    def update_progress(self, value):
        """Update progress bar"""
        self.progress_bar.value = f"""
        <div class="progress-bar">
            <div class="progress-fill" style="width: {value}%"></div>
        </div>
        """

    def generate_summary(self, method, max_length, sentences_count):
        """Generate summary based on user input"""
        if not self.models_loaded:
            with self.output_area:
                clear_output()
                display(HTML("<div style='color: red; text-align: center; padding: 20px;'>‚ùå Models not loaded properly. Please refresh and try again.</div>"))
            return

        text = self.text_input.value.strip()
        if len(text) < 50:
            with self.output_area:
                clear_output()
                display(HTML("<div style='color: red; text-align: center; padding: 20px;'>‚ùå Please enter at least 50 characters of text.</div>"))
            return

        with self.output_area:
            clear_output()
            self.update_progress(10)

            # Show loading message
            loading_html = """
            <div style='text-align: center; padding: 30px;'>
                <div class='loading-spinner' style='width: 40px; height: 40px; margin: 20px auto;'></div>
                <h3>Generating Summary...</h3>
                <p>AI is processing your text. This may take a few seconds.</p>
            </div>
            """
            display(HTML(loading_html))

            try:
                self.update_progress(40)

                # Generate summary based on selected method
                if method == 'bart':
                    summary = self.bart_summarize(text, max_length)
                elif method == 'text_rank':
                    summary = self.sumy_summarize(text, 'text_rank', sentences_count)
                elif method == 'lsa':
                    summary = self.sumy_summarize(text, 'lsa', sentences_count)
                elif method == 'lex_rank':
                    summary = self.sumy_summarize(text, 'lex_rank', sentences_count)
                elif method == 'luhn':
                    summary = self.sumy_summarize(text, 'luhn', sentences_count)
                else:
                    summary = "Method not implemented"

                self.update_progress(80)

                # Calculate statistics
                original_words = len(text.split())
                summary_words = len(summary.split())
                compression_ratio = (1 - summary_words / original_words) * 100 if original_words > 0 else 0

                self.update_progress(100)

                # Display results
                method_names = {
                    'bart': 'BART AI',
                    'text_rank': 'TextRank',
                    'lsa': 'LSA',
                    'lex_rank': 'LexRank',
                    'luhn': 'Luhn'
                }

                results_html = f"""
                <div class="output-section">
                    <h3>üìä Summary Results</h3>

                    <div class="stats-card">
                        <div style="display: flex; justify-content: space-between; flex-wrap: wrap;">
                            <div style="text-align: center;">
                                <h4>Method</h4>
                                <div class="method-badge">{method_names.get(method, method)}</div>
                            </div>
                            <div style="text-align: center;">
                                <h4>Compression</h4>
                                <h3>{compression_ratio:.1f}%</h3>
                            </div>
                            <div style="text-align: center;">
                                <h4>Original</h4>
                                <h3>{original_words} words</h3>
                            </div>
                            <div style="text-align: center;">
                                <h4>Summary</h4>
                                <h3>{summary_words} words</h3>
                            </div>
                        </div>
                    </div>

                    <div class="summary-card">
                        <h4>üìñ Generated Summary:</h4>
                        <p style="line-height: 1.6; font-size: 1.1em; color: #333;">{summary}</p>
                    </div>

                    <div style="background: #e8f5e8; padding: 15px; border-radius: 10px; margin-top: 20px;">
                        <h4>üí° Tips:</h4>
                        <ul>
                            <li>For longer texts, use BART method for best results</li>
                            <li>Adjust summary length based on your needs</li>
                            <li>Extractive methods work well for factual content</li>
                            <li>AI methods are better for creative rewriting</li>
                        </ul>
                    </div>
                </div>
                """

                clear_output()
                display(HTML(results_html))

            except Exception as e:
                self.update_progress(0)
                clear_output()
                display(HTML(f"<div style='color: red; text-align: center; padding: 20px;'>‚ùå Error generating summary: {str(e)}</div>"))

    def bart_summarize(self, text, max_length=150):
        """Summarize using BART model"""
        if len(text) > 1024:
            # Handle long texts by taking first 1024 characters
            text = text[:1024]

        result = self.bart_summarizer(text, max_length=max_length, min_length=30, do_sample=False)
        return result[0]['summary_text']

    def sumy_summarize(self, text, method, sentences_count=3):
        """Summarize using sumy methods"""
        parser = PlaintextParser.from_string(text, Tokenizer(self.language))

        if method == "lsa":
            summarizer = self.lsa_summarizer
        elif method == "text_rank":
            summarizer = self.text_rank_summarizer
        elif method == "lex_rank":
            summarizer = self.lex_rank_summarizer
        elif method == "luhn":
            summarizer = self.luhn_summarizer
        else:
            return "Invalid method"

        summary_sentences = summarizer(parser.document, sentences_count)
        return ' '.join(str(sentence) for sentence in summary_sentences)

# Create and display the app
print("üöÄ Starting Text Summarizer App...")
app = TextSummarizerApp()
app.create_ui()

# Instructions
display(HTML("""
<div style="background: #fff3cd; padding: 20px; border-radius: 10px; border-left: 5px solid #ffc107; margin: 20px 0;">
    <h3>üéØ How to Use:</h3>
    <ol>
        <li><strong>Enter Text</strong>: Paste or type your text in the input box above</li>
        <li><strong>Choose Method</strong>: Select your preferred summarization method</li>
        <li><strong>Adjust Settings</strong>: Set summary length or number of sentences</li>
        <li><strong>Generate</strong>: Click the "Generate Summary" button</li>
        <li><strong>View Results</strong>: See your summary with statistics and analysis</li>
    </ol>

    <h4>üîß Method Guide:</h4>
    <ul>
        <li><strong>BART</strong>: Best quality, uses AI for abstractive summarization</li>
        <li><strong>TextRank</strong>: Good for factual content, extracts important sentences</li>
        <li><strong>LSA</strong>: Uses semantic analysis to find key content</li>
        <li><strong>LexRank</strong>: Graph-based method for sentence importance</li>
    </ul>
</div>
"""))

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m97.3/97.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.3/6.3 MB[0m [31m60.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for breadability (setup.py) ... [?25l[?25hdone
  Building wheel for docopt (setup.py) ... [?25l[?25hdone


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


üöÄ Starting Text Summarizer App...


HTML(value="<div class='loading-spinner'></div> Loading models...")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


VBox(children=(HTML(value='\n        <div class="header">\n            <h1>üìù AI Text Summarizer</h1>\n        ‚Ä¶