In [1]:
!pip install -q PyMuPDF

import gradio as gr
import fitz  # PyMuPDF
import re, os, zipfile
import matplotlib.pyplot as plt
from transformers import pipeline

# Load BART summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

ai_phrases = [
    "this paper presents", "we propose", "in this study", "the purpose of", "the aim of this paper",
    "based on the results", "in conclusion", "extensively studied", "significant improvement",
    "state-of-the-art", "our findings", "the research indicates", "demonstrates the effectiveness",
    "in recent years", "machine learning", "language model", "transformer model"
]

def ai_content_score(text):
    count = sum(1 for phrase in ai_phrases if phrase in text.lower())
    return round(min(100, (count / len(ai_phrases)) * 100), 2)

def extract_abstract(text):
    text = text.replace('\n', ' ')
    abstract = ""
    patterns = [
        r'abstract[\s:\-]*([\s\S]{100,1500})',
        r'a[\s]b[\s]*s[\s]*t[\s]*r[\s]*a[\s]*c[\s]*t[\s:\-]([\s\S]{100,1500})',
        r'(?i)(abstract|a\s*b\s*s\s*t\s*r\s*a\s*c\s*t)[\s:\-](.?)(?=(introduction|background|methods|1\.|\bkeywords\b))'
    ]
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            abstract = match.group(1).strip()
            break
    if not abstract:
        abstract = "‚ö† Abstract not clearly found. Showing intro content:\n\n" + ' '.join(text.split()[:500])
    return abstract.strip()

def extract_text(filepath):
    filename = os.path.basename(filepath).lower()
    if filename.endswith('.pdf'):
        text = ""
        with fitz.open(filepath) as doc:
            for page in doc:
                text += page.get_text()
        return text
    elif filename.endswith(('.txt', '.csv')):
        with open(filepath, 'r', encoding='utf-8') as f:
            return f.read()
    return ""

def summarize_files(filepaths):
    summaries = []
    zip_filename = "all_summaries.zip"

    # Clean up old files
    [os.remove(f) for f in os.listdir() if f.startswith("summary_")]
    if os.path.exists(zip_filename):
        os.remove(zip_filename)

    output = ""
    if not filepaths:
        return "‚ùó Please upload at least one file.", gr.update(visible=False)

    for filepath in filepaths:
        filename = os.path.basename(filepath)
        full_text = extract_text(filepath)
        abstract = extract_abstract(full_text)
        output += f"\nüìå Abstract from {filename}:\n{abstract}\n"

        if "‚ö†" in abstract:
            summary_text = abstract
        else:
            try:
                result = summarizer(abstract[:3000], max_length=180, min_length=80, do_sample=False)
                summary_text = result[0]['summary_text']

                orig_score = ai_content_score(abstract)
                sum_score = ai_content_score(summary_text)

                # Plot
                labels = ['Original Abstract', 'Summarized Abstract']
                scores = [orig_score, sum_score]
                plt.figure(figsize=(6, 4))
                bars = plt.bar(labels, scores, color=['red', 'green'])
                for bar in bars:
                    yval = bar.get_height()
                    plt.text(bar.get_x() + 0.1, yval + 2, f"{yval:.2f}%", fontsize=12)
                plt.title(f'ü§ñ AI-Content Score: {filename}')
                plt.ylabel("AI-style Score (%)")
                plt.ylim(0, 100)
                plt.tight_layout()
                plt.savefig("score_chart.png")
                plt.close()

            except Exception as e:
                summary_text = f"‚ö† Error during summarization: {e}"

        output += f"üß† Summarized Abstract:\n{summary_text}\n" + "\n" + "-" * 80 + "\n"

        fname = f"summary_{filename}.txt"
        with open(fname, "w") as f:
            f.write(f"üìÑ File: {filename}\n\n")
            f.write("üìå Extracted Abstract:\n" + abstract + "\n\n")
            f.write("üß† Summarized Abstract:\n" + summary_text)
        summaries.append(fname)

    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        for sfile in summaries:
            zipf.write(sfile)

    return output, gr.update(value=zip_filename, visible=True)

# ================== Gradio App ===================

with gr.Blocks() as demo:
    with gr.Tab("Login", id="login_tab") as login_tab:
        email_input = gr.Textbox(label="Email")
        password_input = gr.Textbox(label="Password", type="password")
        login_button = gr.Button("üîê Login")
        login_output = gr.Textbox(label="Login Status", interactive=False)

    with gr.Tabs(visible=False) as main_tabs:
        with gr.Tab("Home"):
            gr.Markdown("# üìö Welcome to Abstract Summarizer using GenAI!")
            gr.Markdown("""
Welcome to the GenAI-powered Abstract Summarizer, your intelligent assistant for academic research!

This tool helps you:
- üß† Automatically extract abstracts from research papers (PDF, TXT, CSV)
- ‚úç Summarize them into 7‚Äì8 sentence human-like summaries
- üìä Evaluate the writing style for AI-generated patterns
- üì¶ Download your results as a ZIP archive

---

### üöÄ How to Use
1. Go to the Service tab
2. Upload one or more research files
3. Click Summarize
4. View results: original abstract, summary, AI-style chart
5. Download all summaries as a .zip file

> Ready to begin? Head to the Service tab and upload your files to get started!
""")

        with gr.Tab("Service"):
            with gr.Column():
                uploader = gr.File(label="üì§ Upload Files", file_count="multiple", type="filepath")
                summarize_button = gr.Button("üß† Summarize")
                output_text = gr.Textbox(label="üìù Summary Output", interactive=False, lines=10)
                download_button = gr.File(label="Download Summaries", interactive=False, visible=False)
                summarize_button.click(summarize_files, inputs=[uploader], outputs=[output_text, download_button])

        with gr.Tab("About"):
            gr.Markdown("### üìò About This Project")
            gr.Markdown("""
This GenAI-powered application assists researchers, students, and academics in simplifying the process of reading scholarly papers.

üîç Core Features
- Abstract extraction from PDF/TXT/CSV
- Summarization using BART (facebook/bart-large-cnn)
- AI-style scoring of content
- Multi-file upload
- ZIP download support
""")

        with gr.Tab("Profile"):
            gr.HTML("""
            <style>
            @media screen and (max-width: 600px) {
                .profile-card {
                    padding: 15px;
                    font-size: 14px;
                }
            }
            </style>
            <div class="profile-card" style="
                border: 2px solid #007bff;
                border-radius: 12px;
                padding: 20px;
                max-width: 90%;
                margin: auto;
                background: #f0f8ff;
                box-sizing: border-box;
                word-wrap: break-word;">
                <h3 style="color: #0056b3;">üë§ Research Analyst</h3>
                <p style="color: #0056b3;"><strong style="color: #0056b3;">Email:</strong> <a href="mailto:logged@example.com">logged@example.com</a></p>
                <p style="color: #0056b3;"><strong style="color: #0056b3;">Role:</strong> Academic Research Specialist</p>
                <p style="color: #0056b3;"><strong style="color: #0056b3;">Interests:</strong> Machine Learning, Natural Language Processing, Data Science</p>
                <p style="color: #0056b3;"><strong style="color: #0056b3;">Member since:</strong> July 2025</p>
                <hr>
                <p style="font-style: italic; color: #555;">"Driving insights through AI-powered research tools."</p>
            </div>
            """)

    # Login logic
    login_button.click(
        fn=lambda email, password: (
            "‚úÖ Login successful! Redirecting...",
            gr.update(visible=True),
            gr.update(visible=False)
        ) if email in ["Naveena@gmail.com", "Harishma@gmail.com"] and password in ["1234", "5678"]
        else ("‚ùå Invalid credentials. Try again.", gr.update(visible=False), gr.update()),
        inputs=[email_input, password_input],
        outputs=[login_output, main_tabs, login_tab]
    )

# üöÄ Launch app
demo.launch()

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m24.1/24.1 MB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://490ea4b37b427c0d5e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


